Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * builtin-record.c
0004  *
0005  * Builtin record command: Record the profile of a workload
0006  * (or a CPU, or a PID) into the perf.data output file - for
0007  * later analysis via perf report.
0008  */
0009 #include "builtin.h"
0010 
0011 #include "util/build-id.h"
0012 #include <subcmd/parse-options.h>
0013 #include "util/parse-events.h"
0014 #include "util/config.h"
0015 
0016 #include "util/callchain.h"
0017 #include "util/cgroup.h"
0018 #include "util/header.h"
0019 #include "util/event.h"
0020 #include "util/evlist.h"
0021 #include "util/evsel.h"
0022 #include "util/debug.h"
0023 #include "util/mmap.h"
0024 #include "util/target.h"
0025 #include "util/session.h"
0026 #include "util/tool.h"
0027 #include "util/symbol.h"
0028 #include "util/record.h"
0029 #include "util/cpumap.h"
0030 #include "util/thread_map.h"
0031 #include "util/data.h"
0032 #include "util/perf_regs.h"
0033 #include "util/auxtrace.h"
0034 #include "util/tsc.h"
0035 #include "util/parse-branch-options.h"
0036 #include "util/parse-regs-options.h"
0037 #include "util/perf_api_probe.h"
0038 #include "util/llvm-utils.h"
0039 #include "util/bpf-loader.h"
0040 #include "util/trigger.h"
0041 #include "util/perf-hooks.h"
0042 #include "util/cpu-set-sched.h"
0043 #include "util/synthetic-events.h"
0044 #include "util/time-utils.h"
0045 #include "util/units.h"
0046 #include "util/bpf-event.h"
0047 #include "util/util.h"
0048 #include "util/pfm.h"
0049 #include "util/clockid.h"
0050 #include "util/pmu-hybrid.h"
0051 #include "util/evlist-hybrid.h"
0052 #include "util/off_cpu.h"
0053 #include "asm/bug.h"
0054 #include "perf.h"
0055 #include "cputopo.h"
0056 
0057 #include <errno.h>
0058 #include <inttypes.h>
0059 #include <locale.h>
0060 #include <poll.h>
0061 #include <pthread.h>
0062 #include <unistd.h>
0063 #ifndef HAVE_GETTID
0064 #include <syscall.h>
0065 #endif
0066 #include <sched.h>
0067 #include <signal.h>
0068 #ifdef HAVE_EVENTFD_SUPPORT
0069 #include <sys/eventfd.h>
0070 #endif
0071 #include <sys/mman.h>
0072 #include <sys/wait.h>
0073 #include <sys/types.h>
0074 #include <sys/stat.h>
0075 #include <fcntl.h>
0076 #include <linux/err.h>
0077 #include <linux/string.h>
0078 #include <linux/time64.h>
0079 #include <linux/zalloc.h>
0080 #include <linux/bitmap.h>
0081 #include <sys/time.h>
0082 
0083 struct switch_output {
0084     bool         enabled;
0085     bool         signal;
0086     unsigned long    size;
0087     unsigned long    time;
0088     const char  *str;
0089     bool         set;
0090     char         **filenames;
0091     int      num_files;
0092     int      cur_file;
0093 };
0094 
0095 struct thread_mask {
0096     struct mmap_cpu_mask    maps;
0097     struct mmap_cpu_mask    affinity;
0098 };
0099 
0100 struct record_thread {
0101     pid_t           tid;
0102     struct thread_mask  *mask;
0103     struct {
0104         int     msg[2];
0105         int     ack[2];
0106     } pipes;
0107     struct fdarray      pollfd;
0108     int         ctlfd_pos;
0109     int         nr_mmaps;
0110     struct mmap     **maps;
0111     struct mmap     **overwrite_maps;
0112     struct record       *rec;
0113     unsigned long long  samples;
0114     unsigned long       waking;
0115     u64         bytes_written;
0116     u64         bytes_transferred;
0117     u64         bytes_compressed;
0118 };
0119 
0120 static __thread struct record_thread *thread;
0121 
0122 enum thread_msg {
0123     THREAD_MSG__UNDEFINED = 0,
0124     THREAD_MSG__READY,
0125     THREAD_MSG__MAX,
0126 };
0127 
0128 static const char *thread_msg_tags[THREAD_MSG__MAX] = {
0129     "UNDEFINED", "READY"
0130 };
0131 
0132 enum thread_spec {
0133     THREAD_SPEC__UNDEFINED = 0,
0134     THREAD_SPEC__CPU,
0135     THREAD_SPEC__CORE,
0136     THREAD_SPEC__PACKAGE,
0137     THREAD_SPEC__NUMA,
0138     THREAD_SPEC__USER,
0139     THREAD_SPEC__MAX,
0140 };
0141 
0142 static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
0143     "undefined", "cpu", "core", "package", "numa", "user"
0144 };
0145 
0146 struct record {
0147     struct perf_tool    tool;
0148     struct record_opts  opts;
0149     u64         bytes_written;
0150     struct perf_data    data;
0151     struct auxtrace_record  *itr;
0152     struct evlist   *evlist;
0153     struct perf_session *session;
0154     struct evlist       *sb_evlist;
0155     pthread_t       thread_id;
0156     int         realtime_prio;
0157     bool            switch_output_event_set;
0158     bool            no_buildid;
0159     bool            no_buildid_set;
0160     bool            no_buildid_cache;
0161     bool            no_buildid_cache_set;
0162     bool            buildid_all;
0163     bool            buildid_mmap;
0164     bool            timestamp_filename;
0165     bool            timestamp_boundary;
0166     bool            off_cpu;
0167     struct switch_output    switch_output;
0168     unsigned long long  samples;
0169     unsigned long       output_max_size;    /* = 0: unlimited */
0170     struct perf_debuginfod  debuginfod;
0171     int         nr_threads;
0172     struct thread_mask  *thread_masks;
0173     struct record_thread    *thread_data;
0174 };
0175 
0176 static volatile int done;
0177 
0178 static volatile int auxtrace_record__snapshot_started;
0179 static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
0180 static DEFINE_TRIGGER(switch_output_trigger);
0181 
0182 static const char *affinity_tags[PERF_AFFINITY_MAX] = {
0183     "SYS", "NODE", "CPU"
0184 };
0185 
0186 #ifndef HAVE_GETTID
0187 static inline pid_t gettid(void)
0188 {
0189     return (pid_t)syscall(__NR_gettid);
0190 }
0191 #endif
0192 
0193 static int record__threads_enabled(struct record *rec)
0194 {
0195     return rec->opts.threads_spec;
0196 }
0197 
0198 static bool switch_output_signal(struct record *rec)
0199 {
0200     return rec->switch_output.signal &&
0201            trigger_is_ready(&switch_output_trigger);
0202 }
0203 
0204 static bool switch_output_size(struct record *rec)
0205 {
0206     return rec->switch_output.size &&
0207            trigger_is_ready(&switch_output_trigger) &&
0208            (rec->bytes_written >= rec->switch_output.size);
0209 }
0210 
0211 static bool switch_output_time(struct record *rec)
0212 {
0213     return rec->switch_output.time &&
0214            trigger_is_ready(&switch_output_trigger);
0215 }
0216 
0217 static u64 record__bytes_written(struct record *rec)
0218 {
0219     int t;
0220     u64 bytes_written = rec->bytes_written;
0221     struct record_thread *thread_data = rec->thread_data;
0222 
0223     for (t = 0; t < rec->nr_threads; t++)
0224         bytes_written += thread_data[t].bytes_written;
0225 
0226     return bytes_written;
0227 }
0228 
0229 static bool record__output_max_size_exceeded(struct record *rec)
0230 {
0231     return rec->output_max_size &&
0232            (record__bytes_written(rec) >= rec->output_max_size);
0233 }
0234 
0235 static int record__write(struct record *rec, struct mmap *map __maybe_unused,
0236              void *bf, size_t size)
0237 {
0238     struct perf_data_file *file = &rec->session->data->file;
0239 
0240     if (map && map->file)
0241         file = map->file;
0242 
0243     if (perf_data_file__write(file, bf, size) < 0) {
0244         pr_err("failed to write perf data, error: %m\n");
0245         return -1;
0246     }
0247 
0248     if (map && map->file)
0249         thread->bytes_written += size;
0250     else
0251         rec->bytes_written += size;
0252 
0253     if (record__output_max_size_exceeded(rec) && !done) {
0254         fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
0255                 " stopping session ]\n",
0256                 record__bytes_written(rec) >> 10);
0257         done = 1;
0258     }
0259 
0260     if (switch_output_size(rec))
0261         trigger_hit(&switch_output_trigger);
0262 
0263     return 0;
0264 }
0265 
0266 static int record__aio_enabled(struct record *rec);
0267 static int record__comp_enabled(struct record *rec);
0268 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
0269                 void *dst, size_t dst_size, void *src, size_t src_size);
0270 
0271 #ifdef HAVE_AIO_SUPPORT
0272 static int record__aio_write(struct aiocb *cblock, int trace_fd,
0273         void *buf, size_t size, off_t off)
0274 {
0275     int rc;
0276 
0277     cblock->aio_fildes = trace_fd;
0278     cblock->aio_buf    = buf;
0279     cblock->aio_nbytes = size;
0280     cblock->aio_offset = off;
0281     cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
0282 
0283     do {
0284         rc = aio_write(cblock);
0285         if (rc == 0) {
0286             break;
0287         } else if (errno != EAGAIN) {
0288             cblock->aio_fildes = -1;
0289             pr_err("failed to queue perf data, error: %m\n");
0290             break;
0291         }
0292     } while (1);
0293 
0294     return rc;
0295 }
0296 
0297 static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
0298 {
0299     void *rem_buf;
0300     off_t rem_off;
0301     size_t rem_size;
0302     int rc, aio_errno;
0303     ssize_t aio_ret, written;
0304 
0305     aio_errno = aio_error(cblock);
0306     if (aio_errno == EINPROGRESS)
0307         return 0;
0308 
0309     written = aio_ret = aio_return(cblock);
0310     if (aio_ret < 0) {
0311         if (aio_errno != EINTR)
0312             pr_err("failed to write perf data, error: %m\n");
0313         written = 0;
0314     }
0315 
0316     rem_size = cblock->aio_nbytes - written;
0317 
0318     if (rem_size == 0) {
0319         cblock->aio_fildes = -1;
0320         /*
0321          * md->refcount is incremented in record__aio_pushfn() for
0322          * every aio write request started in record__aio_push() so
0323          * decrement it because the request is now complete.
0324          */
0325         perf_mmap__put(&md->core);
0326         rc = 1;
0327     } else {
0328         /*
0329          * aio write request may require restart with the
0330          * reminder if the kernel didn't write whole
0331          * chunk at once.
0332          */
0333         rem_off = cblock->aio_offset + written;
0334         rem_buf = (void *)(cblock->aio_buf + written);
0335         record__aio_write(cblock, cblock->aio_fildes,
0336                 rem_buf, rem_size, rem_off);
0337         rc = 0;
0338     }
0339 
0340     return rc;
0341 }
0342 
0343 static int record__aio_sync(struct mmap *md, bool sync_all)
0344 {
0345     struct aiocb **aiocb = md->aio.aiocb;
0346     struct aiocb *cblocks = md->aio.cblocks;
0347     struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
0348     int i, do_suspend;
0349 
0350     do {
0351         do_suspend = 0;
0352         for (i = 0; i < md->aio.nr_cblocks; ++i) {
0353             if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
0354                 if (sync_all)
0355                     aiocb[i] = NULL;
0356                 else
0357                     return i;
0358             } else {
0359                 /*
0360                  * Started aio write is not complete yet
0361                  * so it has to be waited before the
0362                  * next allocation.
0363                  */
0364                 aiocb[i] = &cblocks[i];
0365                 do_suspend = 1;
0366             }
0367         }
0368         if (!do_suspend)
0369             return -1;
0370 
0371         while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
0372             if (!(errno == EAGAIN || errno == EINTR))
0373                 pr_err("failed to sync perf data, error: %m\n");
0374         }
0375     } while (1);
0376 }
0377 
0378 struct record_aio {
0379     struct record   *rec;
0380     void        *data;
0381     size_t      size;
0382 };
0383 
0384 static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
0385 {
0386     struct record_aio *aio = to;
0387 
0388     /*
0389      * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
0390      * to release space in the kernel buffer as fast as possible, calling
0391      * perf_mmap__consume() from perf_mmap__push() function.
0392      *
0393      * That lets the kernel to proceed with storing more profiling data into
0394      * the kernel buffer earlier than other per-cpu kernel buffers are handled.
0395      *
0396      * Coping can be done in two steps in case the chunk of profiling data
0397      * crosses the upper bound of the kernel buffer. In this case we first move
0398      * part of data from map->start till the upper bound and then the reminder
0399      * from the beginning of the kernel buffer till the end of the data chunk.
0400      */
0401 
0402     if (record__comp_enabled(aio->rec)) {
0403         size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
0404                      mmap__mmap_len(map) - aio->size,
0405                      buf, size);
0406     } else {
0407         memcpy(aio->data + aio->size, buf, size);
0408     }
0409 
0410     if (!aio->size) {
0411         /*
0412          * Increment map->refcount to guard map->aio.data[] buffer
0413          * from premature deallocation because map object can be
0414          * released earlier than aio write request started on
0415          * map->aio.data[] buffer is complete.
0416          *
0417          * perf_mmap__put() is done at record__aio_complete()
0418          * after started aio request completion or at record__aio_push()
0419          * if the request failed to start.
0420          */
0421         perf_mmap__get(&map->core);
0422     }
0423 
0424     aio->size += size;
0425 
0426     return size;
0427 }
0428 
0429 static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
0430 {
0431     int ret, idx;
0432     int trace_fd = rec->session->data->file.fd;
0433     struct record_aio aio = { .rec = rec, .size = 0 };
0434 
0435     /*
0436      * Call record__aio_sync() to wait till map->aio.data[] buffer
0437      * becomes available after previous aio write operation.
0438      */
0439 
0440     idx = record__aio_sync(map, false);
0441     aio.data = map->aio.data[idx];
0442     ret = perf_mmap__push(map, &aio, record__aio_pushfn);
0443     if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
0444         return ret;
0445 
0446     rec->samples++;
0447     ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
0448     if (!ret) {
0449         *off += aio.size;
0450         rec->bytes_written += aio.size;
0451         if (switch_output_size(rec))
0452             trigger_hit(&switch_output_trigger);
0453     } else {
0454         /*
0455          * Decrement map->refcount incremented in record__aio_pushfn()
0456          * back if record__aio_write() operation failed to start, otherwise
0457          * map->refcount is decremented in record__aio_complete() after
0458          * aio write operation finishes successfully.
0459          */
0460         perf_mmap__put(&map->core);
0461     }
0462 
0463     return ret;
0464 }
0465 
0466 static off_t record__aio_get_pos(int trace_fd)
0467 {
0468     return lseek(trace_fd, 0, SEEK_CUR);
0469 }
0470 
0471 static void record__aio_set_pos(int trace_fd, off_t pos)
0472 {
0473     lseek(trace_fd, pos, SEEK_SET);
0474 }
0475 
0476 static void record__aio_mmap_read_sync(struct record *rec)
0477 {
0478     int i;
0479     struct evlist *evlist = rec->evlist;
0480     struct mmap *maps = evlist->mmap;
0481 
0482     if (!record__aio_enabled(rec))
0483         return;
0484 
0485     for (i = 0; i < evlist->core.nr_mmaps; i++) {
0486         struct mmap *map = &maps[i];
0487 
0488         if (map->core.base)
0489             record__aio_sync(map, true);
0490     }
0491 }
0492 
0493 static int nr_cblocks_default = 1;
0494 static int nr_cblocks_max = 4;
0495 
0496 static int record__aio_parse(const struct option *opt,
0497                  const char *str,
0498                  int unset)
0499 {
0500     struct record_opts *opts = (struct record_opts *)opt->value;
0501 
0502     if (unset) {
0503         opts->nr_cblocks = 0;
0504     } else {
0505         if (str)
0506             opts->nr_cblocks = strtol(str, NULL, 0);
0507         if (!opts->nr_cblocks)
0508             opts->nr_cblocks = nr_cblocks_default;
0509     }
0510 
0511     return 0;
0512 }
0513 #else /* HAVE_AIO_SUPPORT */
0514 static int nr_cblocks_max = 0;
0515 
0516 static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
0517                 off_t *off __maybe_unused)
0518 {
0519     return -1;
0520 }
0521 
0522 static off_t record__aio_get_pos(int trace_fd __maybe_unused)
0523 {
0524     return -1;
0525 }
0526 
0527 static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
0528 {
0529 }
0530 
0531 static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
0532 {
0533 }
0534 #endif
0535 
0536 static int record__aio_enabled(struct record *rec)
0537 {
0538     return rec->opts.nr_cblocks > 0;
0539 }
0540 
0541 #define MMAP_FLUSH_DEFAULT 1
0542 static int record__mmap_flush_parse(const struct option *opt,
0543                     const char *str,
0544                     int unset)
0545 {
0546     int flush_max;
0547     struct record_opts *opts = (struct record_opts *)opt->value;
0548     static struct parse_tag tags[] = {
0549             { .tag  = 'B', .mult = 1       },
0550             { .tag  = 'K', .mult = 1 << 10 },
0551             { .tag  = 'M', .mult = 1 << 20 },
0552             { .tag  = 'G', .mult = 1 << 30 },
0553             { .tag  = 0 },
0554     };
0555 
0556     if (unset)
0557         return 0;
0558 
0559     if (str) {
0560         opts->mmap_flush = parse_tag_value(str, tags);
0561         if (opts->mmap_flush == (int)-1)
0562             opts->mmap_flush = strtol(str, NULL, 0);
0563     }
0564 
0565     if (!opts->mmap_flush)
0566         opts->mmap_flush = MMAP_FLUSH_DEFAULT;
0567 
0568     flush_max = evlist__mmap_size(opts->mmap_pages);
0569     flush_max /= 4;
0570     if (opts->mmap_flush > flush_max)
0571         opts->mmap_flush = flush_max;
0572 
0573     return 0;
0574 }
0575 
0576 #ifdef HAVE_ZSTD_SUPPORT
0577 static unsigned int comp_level_default = 1;
0578 
0579 static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
0580 {
0581     struct record_opts *opts = opt->value;
0582 
0583     if (unset) {
0584         opts->comp_level = 0;
0585     } else {
0586         if (str)
0587             opts->comp_level = strtol(str, NULL, 0);
0588         if (!opts->comp_level)
0589             opts->comp_level = comp_level_default;
0590     }
0591 
0592     return 0;
0593 }
0594 #endif
0595 static unsigned int comp_level_max = 22;
0596 
0597 static int record__comp_enabled(struct record *rec)
0598 {
0599     return rec->opts.comp_level > 0;
0600 }
0601 
0602 static int process_synthesized_event(struct perf_tool *tool,
0603                      union perf_event *event,
0604                      struct perf_sample *sample __maybe_unused,
0605                      struct machine *machine __maybe_unused)
0606 {
0607     struct record *rec = container_of(tool, struct record, tool);
0608     return record__write(rec, NULL, event, event->header.size);
0609 }
0610 
0611 static int process_locked_synthesized_event(struct perf_tool *tool,
0612                      union perf_event *event,
0613                      struct perf_sample *sample __maybe_unused,
0614                      struct machine *machine __maybe_unused)
0615 {
0616     static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
0617     int ret;
0618 
0619     pthread_mutex_lock(&synth_lock);
0620     ret = process_synthesized_event(tool, event, sample, machine);
0621     pthread_mutex_unlock(&synth_lock);
0622     return ret;
0623 }
0624 
0625 static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
0626 {
0627     struct record *rec = to;
0628 
0629     if (record__comp_enabled(rec)) {
0630         size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size);
0631         bf   = map->data;
0632     }
0633 
0634     thread->samples++;
0635     return record__write(rec, map, bf, size);
0636 }
0637 
0638 static volatile int signr = -1;
0639 static volatile int child_finished;
0640 #ifdef HAVE_EVENTFD_SUPPORT
0641 static int done_fd = -1;
0642 #endif
0643 
0644 static void sig_handler(int sig)
0645 {
0646     if (sig == SIGCHLD)
0647         child_finished = 1;
0648     else
0649         signr = sig;
0650 
0651     done = 1;
0652 #ifdef HAVE_EVENTFD_SUPPORT
0653 {
0654     u64 tmp = 1;
0655     /*
0656      * It is possible for this signal handler to run after done is checked
0657      * in the main loop, but before the perf counter fds are polled. If this
0658      * happens, the poll() will continue to wait even though done is set,
0659      * and will only break out if either another signal is received, or the
0660      * counters are ready for read. To ensure the poll() doesn't sleep when
0661      * done is set, use an eventfd (done_fd) to wake up the poll().
0662      */
0663     if (write(done_fd, &tmp, sizeof(tmp)) < 0)
0664         pr_err("failed to signal wakeup fd, error: %m\n");
0665 }
0666 #endif // HAVE_EVENTFD_SUPPORT
0667 }
0668 
0669 static void sigsegv_handler(int sig)
0670 {
0671     perf_hooks__recover();
0672     sighandler_dump_stack(sig);
0673 }
0674 
0675 static void record__sig_exit(void)
0676 {
0677     if (signr == -1)
0678         return;
0679 
0680     signal(signr, SIG_DFL);
0681     raise(signr);
0682 }
0683 
0684 #ifdef HAVE_AUXTRACE_SUPPORT
0685 
0686 static int record__process_auxtrace(struct perf_tool *tool,
0687                     struct mmap *map,
0688                     union perf_event *event, void *data1,
0689                     size_t len1, void *data2, size_t len2)
0690 {
0691     struct record *rec = container_of(tool, struct record, tool);
0692     struct perf_data *data = &rec->data;
0693     size_t padding;
0694     u8 pad[8] = {0};
0695 
0696     if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
0697         off_t file_offset;
0698         int fd = perf_data__fd(data);
0699         int err;
0700 
0701         file_offset = lseek(fd, 0, SEEK_CUR);
0702         if (file_offset == -1)
0703             return -1;
0704         err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
0705                              event, file_offset);
0706         if (err)
0707             return err;
0708     }
0709 
0710     /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
0711     padding = (len1 + len2) & 7;
0712     if (padding)
0713         padding = 8 - padding;
0714 
0715     record__write(rec, map, event, event->header.size);
0716     record__write(rec, map, data1, len1);
0717     if (len2)
0718         record__write(rec, map, data2, len2);
0719     record__write(rec, map, &pad, padding);
0720 
0721     return 0;
0722 }
0723 
0724 static int record__auxtrace_mmap_read(struct record *rec,
0725                       struct mmap *map)
0726 {
0727     int ret;
0728 
0729     ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
0730                   record__process_auxtrace);
0731     if (ret < 0)
0732         return ret;
0733 
0734     if (ret)
0735         rec->samples++;
0736 
0737     return 0;
0738 }
0739 
0740 static int record__auxtrace_mmap_read_snapshot(struct record *rec,
0741                            struct mmap *map)
0742 {
0743     int ret;
0744 
0745     ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
0746                        record__process_auxtrace,
0747                        rec->opts.auxtrace_snapshot_size);
0748     if (ret < 0)
0749         return ret;
0750 
0751     if (ret)
0752         rec->samples++;
0753 
0754     return 0;
0755 }
0756 
0757 static int record__auxtrace_read_snapshot_all(struct record *rec)
0758 {
0759     int i;
0760     int rc = 0;
0761 
0762     for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
0763         struct mmap *map = &rec->evlist->mmap[i];
0764 
0765         if (!map->auxtrace_mmap.base)
0766             continue;
0767 
0768         if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
0769             rc = -1;
0770             goto out;
0771         }
0772     }
0773 out:
0774     return rc;
0775 }
0776 
0777 static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
0778 {
0779     pr_debug("Recording AUX area tracing snapshot\n");
0780     if (record__auxtrace_read_snapshot_all(rec) < 0) {
0781         trigger_error(&auxtrace_snapshot_trigger);
0782     } else {
0783         if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
0784             trigger_error(&auxtrace_snapshot_trigger);
0785         else
0786             trigger_ready(&auxtrace_snapshot_trigger);
0787     }
0788 }
0789 
0790 static int record__auxtrace_snapshot_exit(struct record *rec)
0791 {
0792     if (trigger_is_error(&auxtrace_snapshot_trigger))
0793         return 0;
0794 
0795     if (!auxtrace_record__snapshot_started &&
0796         auxtrace_record__snapshot_start(rec->itr))
0797         return -1;
0798 
0799     record__read_auxtrace_snapshot(rec, true);
0800     if (trigger_is_error(&auxtrace_snapshot_trigger))
0801         return -1;
0802 
0803     return 0;
0804 }
0805 
0806 static int record__auxtrace_init(struct record *rec)
0807 {
0808     int err;
0809 
0810     if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
0811         && record__threads_enabled(rec)) {
0812         pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
0813         return -EINVAL;
0814     }
0815 
0816     if (!rec->itr) {
0817         rec->itr = auxtrace_record__init(rec->evlist, &err);
0818         if (err)
0819             return err;
0820     }
0821 
0822     err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
0823                           rec->opts.auxtrace_snapshot_opts);
0824     if (err)
0825         return err;
0826 
0827     err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
0828                         rec->opts.auxtrace_sample_opts);
0829     if (err)
0830         return err;
0831 
0832     auxtrace_regroup_aux_output(rec->evlist);
0833 
0834     return auxtrace_parse_filters(rec->evlist);
0835 }
0836 
0837 #else
0838 
0839 static inline
0840 int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
0841                    struct mmap *map __maybe_unused)
0842 {
0843     return 0;
0844 }
0845 
0846 static inline
0847 void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
0848                     bool on_exit __maybe_unused)
0849 {
0850 }
0851 
0852 static inline
0853 int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
0854 {
0855     return 0;
0856 }
0857 
0858 static inline
0859 int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
0860 {
0861     return 0;
0862 }
0863 
0864 static int record__auxtrace_init(struct record *rec __maybe_unused)
0865 {
0866     return 0;
0867 }
0868 
0869 #endif
0870 
0871 static int record__config_text_poke(struct evlist *evlist)
0872 {
0873     struct evsel *evsel;
0874 
0875     /* Nothing to do if text poke is already configured */
0876     evlist__for_each_entry(evlist, evsel) {
0877         if (evsel->core.attr.text_poke)
0878             return 0;
0879     }
0880 
0881     evsel = evlist__add_dummy_on_all_cpus(evlist);
0882     if (!evsel)
0883         return -ENOMEM;
0884 
0885     evsel->core.attr.text_poke = 1;
0886     evsel->core.attr.ksymbol = 1;
0887     evsel->immediate = true;
0888     evsel__set_sample_bit(evsel, TIME);
0889 
0890     return 0;
0891 }
0892 
0893 static int record__config_off_cpu(struct record *rec)
0894 {
0895     return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
0896 }
0897 
0898 static bool record__kcore_readable(struct machine *machine)
0899 {
0900     char kcore[PATH_MAX];
0901     int fd;
0902 
0903     scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
0904 
0905     fd = open(kcore, O_RDONLY);
0906     if (fd < 0)
0907         return false;
0908 
0909     close(fd);
0910 
0911     return true;
0912 }
0913 
0914 static int record__kcore_copy(struct machine *machine, struct perf_data *data)
0915 {
0916     char from_dir[PATH_MAX];
0917     char kcore_dir[PATH_MAX];
0918     int ret;
0919 
0920     snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
0921 
0922     ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
0923     if (ret)
0924         return ret;
0925 
0926     return kcore_copy(from_dir, kcore_dir);
0927 }
0928 
0929 static void record__thread_data_init_pipes(struct record_thread *thread_data)
0930 {
0931     thread_data->pipes.msg[0] = -1;
0932     thread_data->pipes.msg[1] = -1;
0933     thread_data->pipes.ack[0] = -1;
0934     thread_data->pipes.ack[1] = -1;
0935 }
0936 
0937 static int record__thread_data_open_pipes(struct record_thread *thread_data)
0938 {
0939     if (pipe(thread_data->pipes.msg))
0940         return -EINVAL;
0941 
0942     if (pipe(thread_data->pipes.ack)) {
0943         close(thread_data->pipes.msg[0]);
0944         thread_data->pipes.msg[0] = -1;
0945         close(thread_data->pipes.msg[1]);
0946         thread_data->pipes.msg[1] = -1;
0947         return -EINVAL;
0948     }
0949 
0950     pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
0951          thread_data->pipes.msg[0], thread_data->pipes.msg[1],
0952          thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
0953 
0954     return 0;
0955 }
0956 
0957 static void record__thread_data_close_pipes(struct record_thread *thread_data)
0958 {
0959     if (thread_data->pipes.msg[0] != -1) {
0960         close(thread_data->pipes.msg[0]);
0961         thread_data->pipes.msg[0] = -1;
0962     }
0963     if (thread_data->pipes.msg[1] != -1) {
0964         close(thread_data->pipes.msg[1]);
0965         thread_data->pipes.msg[1] = -1;
0966     }
0967     if (thread_data->pipes.ack[0] != -1) {
0968         close(thread_data->pipes.ack[0]);
0969         thread_data->pipes.ack[0] = -1;
0970     }
0971     if (thread_data->pipes.ack[1] != -1) {
0972         close(thread_data->pipes.ack[1]);
0973         thread_data->pipes.ack[1] = -1;
0974     }
0975 }
0976 
0977 static bool evlist__per_thread(struct evlist *evlist)
0978 {
0979     return cpu_map__is_dummy(evlist->core.user_requested_cpus);
0980 }
0981 
0982 static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
0983 {
0984     int m, tm, nr_mmaps = evlist->core.nr_mmaps;
0985     struct mmap *mmap = evlist->mmap;
0986     struct mmap *overwrite_mmap = evlist->overwrite_mmap;
0987     struct perf_cpu_map *cpus = evlist->core.all_cpus;
0988     bool per_thread = evlist__per_thread(evlist);
0989 
0990     if (per_thread)
0991         thread_data->nr_mmaps = nr_mmaps;
0992     else
0993         thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
0994                               thread_data->mask->maps.nbits);
0995     if (mmap) {
0996         thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
0997         if (!thread_data->maps)
0998             return -ENOMEM;
0999     }
1000     if (overwrite_mmap) {
1001         thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1002         if (!thread_data->overwrite_maps) {
1003             zfree(&thread_data->maps);
1004             return -ENOMEM;
1005         }
1006     }
1007     pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1008          thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1009 
1010     for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1011         if (per_thread ||
1012             test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1013             if (thread_data->maps) {
1014                 thread_data->maps[tm] = &mmap[m];
1015                 pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1016                       thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1017             }
1018             if (thread_data->overwrite_maps) {
1019                 thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1020                 pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1021                       thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1022             }
1023             tm++;
1024         }
1025     }
1026 
1027     return 0;
1028 }
1029 
1030 static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1031 {
1032     int f, tm, pos;
1033     struct mmap *map, *overwrite_map;
1034 
1035     fdarray__init(&thread_data->pollfd, 64);
1036 
1037     for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1038         map = thread_data->maps ? thread_data->maps[tm] : NULL;
1039         overwrite_map = thread_data->overwrite_maps ?
1040                 thread_data->overwrite_maps[tm] : NULL;
1041 
1042         for (f = 0; f < evlist->core.pollfd.nr; f++) {
1043             void *ptr = evlist->core.pollfd.priv[f].ptr;
1044 
1045             if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1046                 pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1047                                   &evlist->core.pollfd);
1048                 if (pos < 0)
1049                     return pos;
1050                 pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1051                      thread_data, pos, evlist->core.pollfd.entries[f].fd);
1052             }
1053         }
1054     }
1055 
1056     return 0;
1057 }
1058 
1059 static void record__free_thread_data(struct record *rec)
1060 {
1061     int t;
1062     struct record_thread *thread_data = rec->thread_data;
1063 
1064     if (thread_data == NULL)
1065         return;
1066 
1067     for (t = 0; t < rec->nr_threads; t++) {
1068         record__thread_data_close_pipes(&thread_data[t]);
1069         zfree(&thread_data[t].maps);
1070         zfree(&thread_data[t].overwrite_maps);
1071         fdarray__exit(&thread_data[t].pollfd);
1072     }
1073 
1074     zfree(&rec->thread_data);
1075 }
1076 
1077 static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1078 {
1079     int t, ret;
1080     struct record_thread *thread_data;
1081 
1082     rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1083     if (!rec->thread_data) {
1084         pr_err("Failed to allocate thread data\n");
1085         return -ENOMEM;
1086     }
1087     thread_data = rec->thread_data;
1088 
1089     for (t = 0; t < rec->nr_threads; t++)
1090         record__thread_data_init_pipes(&thread_data[t]);
1091 
1092     for (t = 0; t < rec->nr_threads; t++) {
1093         thread_data[t].rec = rec;
1094         thread_data[t].mask = &rec->thread_masks[t];
1095         ret = record__thread_data_init_maps(&thread_data[t], evlist);
1096         if (ret) {
1097             pr_err("Failed to initialize thread[%d] maps\n", t);
1098             goto out_free;
1099         }
1100         ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1101         if (ret) {
1102             pr_err("Failed to initialize thread[%d] pollfd\n", t);
1103             goto out_free;
1104         }
1105         if (t) {
1106             thread_data[t].tid = -1;
1107             ret = record__thread_data_open_pipes(&thread_data[t]);
1108             if (ret) {
1109                 pr_err("Failed to open thread[%d] communication pipes\n", t);
1110                 goto out_free;
1111             }
1112             ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1113                        POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1114             if (ret < 0) {
1115                 pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1116                 goto out_free;
1117             }
1118             thread_data[t].ctlfd_pos = ret;
1119             pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1120                  thread_data, thread_data[t].ctlfd_pos,
1121                  thread_data[t].pipes.msg[0]);
1122         } else {
1123             thread_data[t].tid = gettid();
1124             if (evlist->ctl_fd.pos == -1)
1125                 continue;
1126             ret = fdarray__dup_entry_from(&thread_data[t].pollfd, evlist->ctl_fd.pos,
1127                               &evlist->core.pollfd);
1128             if (ret < 0) {
1129                 pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1130                 goto out_free;
1131             }
1132             thread_data[t].ctlfd_pos = ret;
1133             pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1134                  thread_data, thread_data[t].ctlfd_pos,
1135                  evlist->core.pollfd.entries[evlist->ctl_fd.pos].fd);
1136         }
1137     }
1138 
1139     return 0;
1140 
1141 out_free:
1142     record__free_thread_data(rec);
1143 
1144     return ret;
1145 }
1146 
1147 static int record__mmap_evlist(struct record *rec,
1148                    struct evlist *evlist)
1149 {
1150     int i, ret;
1151     struct record_opts *opts = &rec->opts;
1152     bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1153                   opts->auxtrace_sample_mode;
1154     char msg[512];
1155 
1156     if (opts->affinity != PERF_AFFINITY_SYS)
1157         cpu__setup_cpunode_map();
1158 
1159     if (evlist__mmap_ex(evlist, opts->mmap_pages,
1160                  opts->auxtrace_mmap_pages,
1161                  auxtrace_overwrite,
1162                  opts->nr_cblocks, opts->affinity,
1163                  opts->mmap_flush, opts->comp_level) < 0) {
1164         if (errno == EPERM) {
1165             pr_err("Permission error mapping pages.\n"
1166                    "Consider increasing "
1167                    "/proc/sys/kernel/perf_event_mlock_kb,\n"
1168                    "or try again with a smaller value of -m/--mmap_pages.\n"
1169                    "(current value: %u,%u)\n",
1170                    opts->mmap_pages, opts->auxtrace_mmap_pages);
1171             return -errno;
1172         } else {
1173             pr_err("failed to mmap with %d (%s)\n", errno,
1174                 str_error_r(errno, msg, sizeof(msg)));
1175             if (errno)
1176                 return -errno;
1177             else
1178                 return -EINVAL;
1179         }
1180     }
1181 
1182     if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1183         return -1;
1184 
1185     ret = record__alloc_thread_data(rec, evlist);
1186     if (ret)
1187         return ret;
1188 
1189     if (record__threads_enabled(rec)) {
1190         ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1191         if (ret) {
1192             pr_err("Failed to create data directory: %s\n", strerror(-ret));
1193             return ret;
1194         }
1195         for (i = 0; i < evlist->core.nr_mmaps; i++) {
1196             if (evlist->mmap)
1197                 evlist->mmap[i].file = &rec->data.dir.files[i];
1198             if (evlist->overwrite_mmap)
1199                 evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1200         }
1201     }
1202 
1203     return 0;
1204 }
1205 
1206 static int record__mmap(struct record *rec)
1207 {
1208     return record__mmap_evlist(rec, rec->evlist);
1209 }
1210 
1211 static int record__open(struct record *rec)
1212 {
1213     char msg[BUFSIZ];
1214     struct evsel *pos;
1215     struct evlist *evlist = rec->evlist;
1216     struct perf_session *session = rec->session;
1217     struct record_opts *opts = &rec->opts;
1218     int rc = 0;
1219 
1220     /*
1221      * For initial_delay, system wide or a hybrid system, we need to add a
1222      * dummy event so that we can track PERF_RECORD_MMAP to cover the delay
1223      * of waiting or event synthesis.
1224      */
1225     if (opts->initial_delay || target__has_cpu(&opts->target) ||
1226         perf_pmu__has_hybrid()) {
1227         pos = evlist__get_tracking_event(evlist);
1228         if (!evsel__is_dummy_event(pos)) {
1229             /* Set up dummy event. */
1230             if (evlist__add_dummy(evlist))
1231                 return -ENOMEM;
1232             pos = evlist__last(evlist);
1233             evlist__set_tracking_event(evlist, pos);
1234         }
1235 
1236         /*
1237          * Enable the dummy event when the process is forked for
1238          * initial_delay, immediately for system wide.
1239          */
1240         if (opts->initial_delay && !pos->immediate &&
1241             !target__has_cpu(&opts->target))
1242             pos->core.attr.enable_on_exec = 1;
1243         else
1244             pos->immediate = 1;
1245     }
1246 
1247     evlist__config(evlist, opts, &callchain_param);
1248 
1249     evlist__for_each_entry(evlist, pos) {
1250 try_again:
1251         if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1252             if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
1253                 if (verbose > 0)
1254                     ui__warning("%s\n", msg);
1255                 goto try_again;
1256             }
1257             if ((errno == EINVAL || errno == EBADF) &&
1258                 pos->core.leader != &pos->core &&
1259                 pos->weak_group) {
1260                     pos = evlist__reset_weak_group(evlist, pos, true);
1261                 goto try_again;
1262             }
1263             rc = -errno;
1264             evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
1265             ui__error("%s\n", msg);
1266             goto out;
1267         }
1268 
1269         pos->supported = true;
1270     }
1271 
1272     if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1273         pr_warning(
1274 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1275 "check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1276 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1277 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1278 "Samples in kernel modules won't be resolved at all.\n\n"
1279 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1280 "even with a suitable vmlinux or kallsyms file.\n\n");
1281     }
1282 
1283     if (evlist__apply_filters(evlist, &pos)) {
1284         pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1285             pos->filter, evsel__name(pos), errno,
1286             str_error_r(errno, msg, sizeof(msg)));
1287         rc = -1;
1288         goto out;
1289     }
1290 
1291     rc = record__mmap(rec);
1292     if (rc)
1293         goto out;
1294 
1295     session->evlist = evlist;
1296     perf_session__set_id_hdr_size(session);
1297 out:
1298     return rc;
1299 }
1300 
1301 static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1302 {
1303     if (rec->evlist->first_sample_time == 0)
1304         rec->evlist->first_sample_time = sample_time;
1305 
1306     if (sample_time)
1307         rec->evlist->last_sample_time = sample_time;
1308 }
1309 
1310 static int process_sample_event(struct perf_tool *tool,
1311                 union perf_event *event,
1312                 struct perf_sample *sample,
1313                 struct evsel *evsel,
1314                 struct machine *machine)
1315 {
1316     struct record *rec = container_of(tool, struct record, tool);
1317 
1318     set_timestamp_boundary(rec, sample->time);
1319 
1320     if (rec->buildid_all)
1321         return 0;
1322 
1323     rec->samples++;
1324     return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1325 }
1326 
1327 static int process_buildids(struct record *rec)
1328 {
1329     struct perf_session *session = rec->session;
1330 
1331     if (perf_data__size(&rec->data) == 0)
1332         return 0;
1333 
1334     /*
1335      * During this process, it'll load kernel map and replace the
1336      * dso->long_name to a real pathname it found.  In this case
1337      * we prefer the vmlinux path like
1338      *   /lib/modules/3.16.4/build/vmlinux
1339      *
1340      * rather than build-id path (in debug directory).
1341      *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1342      */
1343     symbol_conf.ignore_vmlinux_buildid = true;
1344 
1345     /*
1346      * If --buildid-all is given, it marks all DSO regardless of hits,
1347      * so no need to process samples. But if timestamp_boundary is enabled,
1348      * it still needs to walk on all samples to get the timestamps of
1349      * first/last samples.
1350      */
1351     if (rec->buildid_all && !rec->timestamp_boundary)
1352         rec->tool.sample = NULL;
1353 
1354     return perf_session__process_events(session);
1355 }
1356 
1357 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1358 {
1359     int err;
1360     struct perf_tool *tool = data;
1361     /*
1362      *As for guest kernel when processing subcommand record&report,
1363      *we arrange module mmap prior to guest kernel mmap and trigger
1364      *a preload dso because default guest module symbols are loaded
1365      *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1366      *method is used to avoid symbol missing when the first addr is
1367      *in module instead of in guest kernel.
1368      */
1369     err = perf_event__synthesize_modules(tool, process_synthesized_event,
1370                          machine);
1371     if (err < 0)
1372         pr_err("Couldn't record guest kernel [%d]'s reference"
1373                " relocation symbol.\n", machine->pid);
1374 
1375     /*
1376      * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1377      * have no _text sometimes.
1378      */
1379     err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1380                          machine);
1381     if (err < 0)
1382         pr_err("Couldn't record guest kernel [%d]'s reference"
1383                " relocation symbol.\n", machine->pid);
1384 }
1385 
1386 static struct perf_event_header finished_round_event = {
1387     .size = sizeof(struct perf_event_header),
1388     .type = PERF_RECORD_FINISHED_ROUND,
1389 };
1390 
1391 static struct perf_event_header finished_init_event = {
1392     .size = sizeof(struct perf_event_header),
1393     .type = PERF_RECORD_FINISHED_INIT,
1394 };
1395 
1396 static void record__adjust_affinity(struct record *rec, struct mmap *map)
1397 {
1398     if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1399         !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1400               thread->mask->affinity.nbits)) {
1401         bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1402         bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1403               map->affinity_mask.bits, thread->mask->affinity.nbits);
1404         sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1405                     (cpu_set_t *)thread->mask->affinity.bits);
1406         if (verbose == 2) {
1407             pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1408             mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1409         }
1410     }
1411 }
1412 
1413 static size_t process_comp_header(void *record, size_t increment)
1414 {
1415     struct perf_record_compressed *event = record;
1416     size_t size = sizeof(*event);
1417 
1418     if (increment) {
1419         event->header.size += increment;
1420         return increment;
1421     }
1422 
1423     event->header.type = PERF_RECORD_COMPRESSED;
1424     event->header.size = size;
1425 
1426     return size;
1427 }
1428 
1429 static size_t zstd_compress(struct perf_session *session, struct mmap *map,
1430                 void *dst, size_t dst_size, void *src, size_t src_size)
1431 {
1432     size_t compressed;
1433     size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1434     struct zstd_data *zstd_data = &session->zstd_data;
1435 
1436     if (map && map->file)
1437         zstd_data = &map->zstd_data;
1438 
1439     compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1440                              max_record_size, process_comp_header);
1441 
1442     if (map && map->file) {
1443         thread->bytes_transferred += src_size;
1444         thread->bytes_compressed  += compressed;
1445     } else {
1446         session->bytes_transferred += src_size;
1447         session->bytes_compressed  += compressed;
1448     }
1449 
1450     return compressed;
1451 }
1452 
1453 static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1454                     bool overwrite, bool synch)
1455 {
1456     u64 bytes_written = rec->bytes_written;
1457     int i;
1458     int rc = 0;
1459     int nr_mmaps;
1460     struct mmap **maps;
1461     int trace_fd = rec->data.file.fd;
1462     off_t off = 0;
1463 
1464     if (!evlist)
1465         return 0;
1466 
1467     nr_mmaps = thread->nr_mmaps;
1468     maps = overwrite ? thread->overwrite_maps : thread->maps;
1469 
1470     if (!maps)
1471         return 0;
1472 
1473     if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1474         return 0;
1475 
1476     if (record__aio_enabled(rec))
1477         off = record__aio_get_pos(trace_fd);
1478 
1479     for (i = 0; i < nr_mmaps; i++) {
1480         u64 flush = 0;
1481         struct mmap *map = maps[i];
1482 
1483         if (map->core.base) {
1484             record__adjust_affinity(rec, map);
1485             if (synch) {
1486                 flush = map->core.flush;
1487                 map->core.flush = 1;
1488             }
1489             if (!record__aio_enabled(rec)) {
1490                 if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1491                     if (synch)
1492                         map->core.flush = flush;
1493                     rc = -1;
1494                     goto out;
1495                 }
1496             } else {
1497                 if (record__aio_push(rec, map, &off) < 0) {
1498                     record__aio_set_pos(trace_fd, off);
1499                     if (synch)
1500                         map->core.flush = flush;
1501                     rc = -1;
1502                     goto out;
1503                 }
1504             }
1505             if (synch)
1506                 map->core.flush = flush;
1507         }
1508 
1509         if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1510             !rec->opts.auxtrace_sample_mode &&
1511             record__auxtrace_mmap_read(rec, map) != 0) {
1512             rc = -1;
1513             goto out;
1514         }
1515     }
1516 
1517     if (record__aio_enabled(rec))
1518         record__aio_set_pos(trace_fd, off);
1519 
1520     /*
1521      * Mark the round finished in case we wrote
1522      * at least one event.
1523      *
1524      * No need for round events in directory mode,
1525      * because per-cpu maps and files have data
1526      * sorted by kernel.
1527      */
1528     if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1529         rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1530 
1531     if (overwrite)
1532         evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1533 out:
1534     return rc;
1535 }
1536 
1537 static int record__mmap_read_all(struct record *rec, bool synch)
1538 {
1539     int err;
1540 
1541     err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1542     if (err)
1543         return err;
1544 
1545     return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1546 }
1547 
1548 static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1549                        void *arg __maybe_unused)
1550 {
1551     struct perf_mmap *map = fda->priv[fd].ptr;
1552 
1553     if (map)
1554         perf_mmap__put(map);
1555 }
1556 
1557 static void *record__thread(void *arg)
1558 {
1559     enum thread_msg msg = THREAD_MSG__READY;
1560     bool terminate = false;
1561     struct fdarray *pollfd;
1562     int err, ctlfd_pos;
1563 
1564     thread = arg;
1565     thread->tid = gettid();
1566 
1567     err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1568     if (err == -1)
1569         pr_warning("threads[%d]: failed to notify on start: %s\n",
1570                thread->tid, strerror(errno));
1571 
1572     pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1573 
1574     pollfd = &thread->pollfd;
1575     ctlfd_pos = thread->ctlfd_pos;
1576 
1577     for (;;) {
1578         unsigned long long hits = thread->samples;
1579 
1580         if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1581             break;
1582 
1583         if (hits == thread->samples) {
1584 
1585             err = fdarray__poll(pollfd, -1);
1586             /*
1587              * Propagate error, only if there's any. Ignore positive
1588              * number of returned events and interrupt error.
1589              */
1590             if (err > 0 || (err < 0 && errno == EINTR))
1591                 err = 0;
1592             thread->waking++;
1593 
1594             if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1595                         record__thread_munmap_filtered, NULL) == 0)
1596                 break;
1597         }
1598 
1599         if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1600             terminate = true;
1601             close(thread->pipes.msg[0]);
1602             thread->pipes.msg[0] = -1;
1603             pollfd->entries[ctlfd_pos].fd = -1;
1604             pollfd->entries[ctlfd_pos].events = 0;
1605         }
1606 
1607         pollfd->entries[ctlfd_pos].revents = 0;
1608     }
1609     record__mmap_read_all(thread->rec, true);
1610 
1611     err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1612     if (err == -1)
1613         pr_warning("threads[%d]: failed to notify on termination: %s\n",
1614                thread->tid, strerror(errno));
1615 
1616     return NULL;
1617 }
1618 
1619 static void record__init_features(struct record *rec)
1620 {
1621     struct perf_session *session = rec->session;
1622     int feat;
1623 
1624     for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1625         perf_header__set_feat(&session->header, feat);
1626 
1627     if (rec->no_buildid)
1628         perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1629 
1630     if (!have_tracepoints(&rec->evlist->core.entries))
1631         perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1632 
1633     if (!rec->opts.branch_stack)
1634         perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1635 
1636     if (!rec->opts.full_auxtrace)
1637         perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1638 
1639     if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1640         perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1641 
1642     if (!rec->opts.use_clockid)
1643         perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1644 
1645     if (!record__threads_enabled(rec))
1646         perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1647 
1648     if (!record__comp_enabled(rec))
1649         perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1650 
1651     perf_header__clear_feat(&session->header, HEADER_STAT);
1652 }
1653 
1654 static void
1655 record__finish_output(struct record *rec)
1656 {
1657     int i;
1658     struct perf_data *data = &rec->data;
1659     int fd = perf_data__fd(data);
1660 
1661     if (data->is_pipe)
1662         return;
1663 
1664     rec->session->header.data_size += rec->bytes_written;
1665     data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1666     if (record__threads_enabled(rec)) {
1667         for (i = 0; i < data->dir.nr; i++)
1668             data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1669     }
1670 
1671     if (!rec->no_buildid) {
1672         process_buildids(rec);
1673 
1674         if (rec->buildid_all)
1675             dsos__hit_all(rec->session);
1676     }
1677     perf_session__write_header(rec->session, rec->evlist, fd, true);
1678 
1679     return;
1680 }
1681 
1682 static int record__synthesize_workload(struct record *rec, bool tail)
1683 {
1684     int err;
1685     struct perf_thread_map *thread_map;
1686     bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1687 
1688     if (rec->opts.tail_synthesize != tail)
1689         return 0;
1690 
1691     thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1692     if (thread_map == NULL)
1693         return -1;
1694 
1695     err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1696                          process_synthesized_event,
1697                          &rec->session->machines.host,
1698                          needs_mmap,
1699                          rec->opts.sample_address);
1700     perf_thread_map__put(thread_map);
1701     return err;
1702 }
1703 
1704 static int write_finished_init(struct record *rec, bool tail)
1705 {
1706     if (rec->opts.tail_synthesize != tail)
1707         return 0;
1708 
1709     return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event));
1710 }
1711 
1712 static int record__synthesize(struct record *rec, bool tail);
1713 
1714 static int
1715 record__switch_output(struct record *rec, bool at_exit)
1716 {
1717     struct perf_data *data = &rec->data;
1718     int fd, err;
1719     char *new_filename;
1720 
1721     /* Same Size:      "2015122520103046"*/
1722     char timestamp[] = "InvalidTimestamp";
1723 
1724     record__aio_mmap_read_sync(rec);
1725 
1726     write_finished_init(rec, true);
1727 
1728     record__synthesize(rec, true);
1729     if (target__none(&rec->opts.target))
1730         record__synthesize_workload(rec, true);
1731 
1732     rec->samples = 0;
1733     record__finish_output(rec);
1734     err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1735     if (err) {
1736         pr_err("Failed to get current timestamp\n");
1737         return -EINVAL;
1738     }
1739 
1740     fd = perf_data__switch(data, timestamp,
1741                     rec->session->header.data_offset,
1742                     at_exit, &new_filename);
1743     if (fd >= 0 && !at_exit) {
1744         rec->bytes_written = 0;
1745         rec->session->header.data_size = 0;
1746     }
1747 
1748     if (!quiet)
1749         fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1750             data->path, timestamp);
1751 
1752     if (rec->switch_output.num_files) {
1753         int n = rec->switch_output.cur_file + 1;
1754 
1755         if (n >= rec->switch_output.num_files)
1756             n = 0;
1757         rec->switch_output.cur_file = n;
1758         if (rec->switch_output.filenames[n]) {
1759             remove(rec->switch_output.filenames[n]);
1760             zfree(&rec->switch_output.filenames[n]);
1761         }
1762         rec->switch_output.filenames[n] = new_filename;
1763     } else {
1764         free(new_filename);
1765     }
1766 
1767     /* Output tracking events */
1768     if (!at_exit) {
1769         record__synthesize(rec, false);
1770 
1771         /*
1772          * In 'perf record --switch-output' without -a,
1773          * record__synthesize() in record__switch_output() won't
1774          * generate tracking events because there's no thread_map
1775          * in evlist. Which causes newly created perf.data doesn't
1776          * contain map and comm information.
1777          * Create a fake thread_map and directly call
1778          * perf_event__synthesize_thread_map() for those events.
1779          */
1780         if (target__none(&rec->opts.target))
1781             record__synthesize_workload(rec, false);
1782         write_finished_init(rec, false);
1783     }
1784     return fd;
1785 }
1786 
1787 static volatile int workload_exec_errno;
1788 
1789 /*
1790  * evlist__prepare_workload will send a SIGUSR1
1791  * if the fork fails, since we asked by setting its
1792  * want_signal to true.
1793  */
1794 static void workload_exec_failed_signal(int signo __maybe_unused,
1795                     siginfo_t *info,
1796                     void *ucontext __maybe_unused)
1797 {
1798     workload_exec_errno = info->si_value.sival_int;
1799     done = 1;
1800     child_finished = 1;
1801 }
1802 
1803 static void snapshot_sig_handler(int sig);
1804 static void alarm_sig_handler(int sig);
1805 
1806 static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
1807 {
1808     if (evlist) {
1809         if (evlist->mmap && evlist->mmap[0].core.base)
1810             return evlist->mmap[0].core.base;
1811         if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1812             return evlist->overwrite_mmap[0].core.base;
1813     }
1814     return NULL;
1815 }
1816 
1817 static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1818 {
1819     const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
1820     if (pc)
1821         return pc;
1822     return NULL;
1823 }
1824 
1825 static int record__synthesize(struct record *rec, bool tail)
1826 {
1827     struct perf_session *session = rec->session;
1828     struct machine *machine = &session->machines.host;
1829     struct perf_data *data = &rec->data;
1830     struct record_opts *opts = &rec->opts;
1831     struct perf_tool *tool = &rec->tool;
1832     int err = 0;
1833     event_op f = process_synthesized_event;
1834 
1835     if (rec->opts.tail_synthesize != tail)
1836         return 0;
1837 
1838     if (data->is_pipe) {
1839         err = perf_event__synthesize_for_pipe(tool, session, data,
1840                               process_synthesized_event);
1841         if (err < 0)
1842             goto out;
1843 
1844         rec->bytes_written += err;
1845     }
1846 
1847     err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1848                       process_synthesized_event, machine);
1849     if (err)
1850         goto out;
1851 
1852     /* Synthesize id_index before auxtrace_info */
1853     err = perf_event__synthesize_id_index(tool,
1854                           process_synthesized_event,
1855                           session->evlist, machine);
1856     if (err)
1857         goto out;
1858 
1859     if (rec->opts.full_auxtrace) {
1860         err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1861                     session, process_synthesized_event);
1862         if (err)
1863             goto out;
1864     }
1865 
1866     if (!evlist__exclude_kernel(rec->evlist)) {
1867         err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1868                              machine);
1869         WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1870                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1871                    "Check /proc/kallsyms permission or run as root.\n");
1872 
1873         err = perf_event__synthesize_modules(tool, process_synthesized_event,
1874                              machine);
1875         WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1876                    "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1877                    "Check /proc/modules permission or run as root.\n");
1878     }
1879 
1880     if (perf_guest) {
1881         machines__process_guests(&session->machines,
1882                      perf_event__synthesize_guest_os, tool);
1883     }
1884 
1885     err = perf_event__synthesize_extra_attr(&rec->tool,
1886                         rec->evlist,
1887                         process_synthesized_event,
1888                         data->is_pipe);
1889     if (err)
1890         goto out;
1891 
1892     err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
1893                          process_synthesized_event,
1894                         NULL);
1895     if (err < 0) {
1896         pr_err("Couldn't synthesize thread map.\n");
1897         return err;
1898     }
1899 
1900     err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
1901                          process_synthesized_event, NULL);
1902     if (err < 0) {
1903         pr_err("Couldn't synthesize cpu map.\n");
1904         return err;
1905     }
1906 
1907     err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
1908                         machine, opts);
1909     if (err < 0) {
1910         pr_warning("Couldn't synthesize bpf events.\n");
1911         err = 0;
1912     }
1913 
1914     if (rec->opts.synth & PERF_SYNTH_CGROUP) {
1915         err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
1916                              machine);
1917         if (err < 0) {
1918             pr_warning("Couldn't synthesize cgroup events.\n");
1919             err = 0;
1920         }
1921     }
1922 
1923     if (rec->opts.nr_threads_synthesize > 1) {
1924         perf_set_multithreaded();
1925         f = process_locked_synthesized_event;
1926     }
1927 
1928     if (rec->opts.synth & PERF_SYNTH_TASK) {
1929         bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1930 
1931         err = __machine__synthesize_threads(machine, tool, &opts->target,
1932                             rec->evlist->core.threads,
1933                             f, needs_mmap, opts->sample_address,
1934                             rec->opts.nr_threads_synthesize);
1935     }
1936 
1937     if (rec->opts.nr_threads_synthesize > 1)
1938         perf_set_singlethreaded();
1939 
1940 out:
1941     return err;
1942 }
1943 
1944 static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
1945 {
1946     struct record *rec = data;
1947     pthread_kill(rec->thread_id, SIGUSR2);
1948     return 0;
1949 }
1950 
1951 static int record__setup_sb_evlist(struct record *rec)
1952 {
1953     struct record_opts *opts = &rec->opts;
1954 
1955     if (rec->sb_evlist != NULL) {
1956         /*
1957          * We get here if --switch-output-event populated the
1958          * sb_evlist, so associate a callback that will send a SIGUSR2
1959          * to the main thread.
1960          */
1961         evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
1962         rec->thread_id = pthread_self();
1963     }
1964 #ifdef HAVE_LIBBPF_SUPPORT
1965     if (!opts->no_bpf_event) {
1966         if (rec->sb_evlist == NULL) {
1967             rec->sb_evlist = evlist__new();
1968 
1969             if (rec->sb_evlist == NULL) {
1970                 pr_err("Couldn't create side band evlist.\n.");
1971                 return -1;
1972             }
1973         }
1974 
1975         if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
1976             pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
1977             return -1;
1978         }
1979     }
1980 #endif
1981     if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
1982         pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
1983         opts->no_bpf_event = true;
1984     }
1985 
1986     return 0;
1987 }
1988 
1989 static int record__init_clock(struct record *rec)
1990 {
1991     struct perf_session *session = rec->session;
1992     struct timespec ref_clockid;
1993     struct timeval ref_tod;
1994     u64 ref;
1995 
1996     if (!rec->opts.use_clockid)
1997         return 0;
1998 
1999     if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
2000         session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
2001 
2002     session->header.env.clock.clockid = rec->opts.clockid;
2003 
2004     if (gettimeofday(&ref_tod, NULL) != 0) {
2005         pr_err("gettimeofday failed, cannot set reference time.\n");
2006         return -1;
2007     }
2008 
2009     if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
2010         pr_err("clock_gettime failed, cannot set reference time.\n");
2011         return -1;
2012     }
2013 
2014     ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
2015           (u64) ref_tod.tv_usec * NSEC_PER_USEC;
2016 
2017     session->header.env.clock.tod_ns = ref;
2018 
2019     ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2020           (u64) ref_clockid.tv_nsec;
2021 
2022     session->header.env.clock.clockid_ns = ref;
2023     return 0;
2024 }
2025 
2026 static void hit_auxtrace_snapshot_trigger(struct record *rec)
2027 {
2028     if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2029         trigger_hit(&auxtrace_snapshot_trigger);
2030         auxtrace_record__snapshot_started = 1;
2031         if (auxtrace_record__snapshot_start(rec->itr))
2032             trigger_error(&auxtrace_snapshot_trigger);
2033     }
2034 }
2035 
2036 static void record__uniquify_name(struct record *rec)
2037 {
2038     struct evsel *pos;
2039     struct evlist *evlist = rec->evlist;
2040     char *new_name;
2041     int ret;
2042 
2043     if (!perf_pmu__has_hybrid())
2044         return;
2045 
2046     evlist__for_each_entry(evlist, pos) {
2047         if (!evsel__is_hybrid(pos))
2048             continue;
2049 
2050         if (strchr(pos->name, '/'))
2051             continue;
2052 
2053         ret = asprintf(&new_name, "%s/%s/",
2054                    pos->pmu_name, pos->name);
2055         if (ret) {
2056             free(pos->name);
2057             pos->name = new_name;
2058         }
2059     }
2060 }
2061 
2062 static int record__terminate_thread(struct record_thread *thread_data)
2063 {
2064     int err;
2065     enum thread_msg ack = THREAD_MSG__UNDEFINED;
2066     pid_t tid = thread_data->tid;
2067 
2068     close(thread_data->pipes.msg[1]);
2069     thread_data->pipes.msg[1] = -1;
2070     err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2071     if (err > 0)
2072         pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2073     else
2074         pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2075                thread->tid, tid);
2076 
2077     return 0;
2078 }
2079 
2080 static int record__start_threads(struct record *rec)
2081 {
2082     int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2083     struct record_thread *thread_data = rec->thread_data;
2084     sigset_t full, mask;
2085     pthread_t handle;
2086     pthread_attr_t attrs;
2087 
2088     thread = &thread_data[0];
2089 
2090     if (!record__threads_enabled(rec))
2091         return 0;
2092 
2093     sigfillset(&full);
2094     if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2095         pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2096         return -1;
2097     }
2098 
2099     pthread_attr_init(&attrs);
2100     pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2101 
2102     for (t = 1; t < nr_threads; t++) {
2103         enum thread_msg msg = THREAD_MSG__UNDEFINED;
2104 
2105 #ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2106         pthread_attr_setaffinity_np(&attrs,
2107                         MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2108                         (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2109 #endif
2110         if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2111             for (tt = 1; tt < t; tt++)
2112                 record__terminate_thread(&thread_data[t]);
2113             pr_err("Failed to start threads: %s\n", strerror(errno));
2114             ret = -1;
2115             goto out_err;
2116         }
2117 
2118         err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2119         if (err > 0)
2120             pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2121                   thread_msg_tags[msg]);
2122         else
2123             pr_warning("threads[%d]: failed to receive start notification from %d\n",
2124                    thread->tid, rec->thread_data[t].tid);
2125     }
2126 
2127     sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2128             (cpu_set_t *)thread->mask->affinity.bits);
2129 
2130     pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2131 
2132 out_err:
2133     pthread_attr_destroy(&attrs);
2134 
2135     if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2136         pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2137         ret = -1;
2138     }
2139 
2140     return ret;
2141 }
2142 
2143 static int record__stop_threads(struct record *rec)
2144 {
2145     int t;
2146     struct record_thread *thread_data = rec->thread_data;
2147 
2148     for (t = 1; t < rec->nr_threads; t++)
2149         record__terminate_thread(&thread_data[t]);
2150 
2151     for (t = 0; t < rec->nr_threads; t++) {
2152         rec->samples += thread_data[t].samples;
2153         if (!record__threads_enabled(rec))
2154             continue;
2155         rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2156         rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2157         pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2158              thread_data[t].samples, thread_data[t].waking);
2159         if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2160             pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2161                  thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2162         else
2163             pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2164     }
2165 
2166     return 0;
2167 }
2168 
2169 static unsigned long record__waking(struct record *rec)
2170 {
2171     int t;
2172     unsigned long waking = 0;
2173     struct record_thread *thread_data = rec->thread_data;
2174 
2175     for (t = 0; t < rec->nr_threads; t++)
2176         waking += thread_data[t].waking;
2177 
2178     return waking;
2179 }
2180 
2181 static int __cmd_record(struct record *rec, int argc, const char **argv)
2182 {
2183     int err;
2184     int status = 0;
2185     const bool forks = argc > 0;
2186     struct perf_tool *tool = &rec->tool;
2187     struct record_opts *opts = &rec->opts;
2188     struct perf_data *data = &rec->data;
2189     struct perf_session *session;
2190     bool disabled = false, draining = false;
2191     int fd;
2192     float ratio = 0;
2193     enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2194 
2195     atexit(record__sig_exit);
2196     signal(SIGCHLD, sig_handler);
2197     signal(SIGINT, sig_handler);
2198     signal(SIGTERM, sig_handler);
2199     signal(SIGSEGV, sigsegv_handler);
2200 
2201     if (rec->opts.record_namespaces)
2202         tool->namespace_events = true;
2203 
2204     if (rec->opts.record_cgroup) {
2205 #ifdef HAVE_FILE_HANDLE
2206         tool->cgroup_events = true;
2207 #else
2208         pr_err("cgroup tracking is not supported\n");
2209         return -1;
2210 #endif
2211     }
2212 
2213     if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2214         signal(SIGUSR2, snapshot_sig_handler);
2215         if (rec->opts.auxtrace_snapshot_mode)
2216             trigger_on(&auxtrace_snapshot_trigger);
2217         if (rec->switch_output.enabled)
2218             trigger_on(&switch_output_trigger);
2219     } else {
2220         signal(SIGUSR2, SIG_IGN);
2221     }
2222 
2223     session = perf_session__new(data, tool);
2224     if (IS_ERR(session)) {
2225         pr_err("Perf session creation failed.\n");
2226         return PTR_ERR(session);
2227     }
2228 
2229     if (record__threads_enabled(rec)) {
2230         if (perf_data__is_pipe(&rec->data)) {
2231             pr_err("Parallel trace streaming is not available in pipe mode.\n");
2232             return -1;
2233         }
2234         if (rec->opts.full_auxtrace) {
2235             pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2236             return -1;
2237         }
2238     }
2239 
2240     fd = perf_data__fd(data);
2241     rec->session = session;
2242 
2243     if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2244         pr_err("Compression initialization failed.\n");
2245         return -1;
2246     }
2247 #ifdef HAVE_EVENTFD_SUPPORT
2248     done_fd = eventfd(0, EFD_NONBLOCK);
2249     if (done_fd < 0) {
2250         pr_err("Failed to create wakeup eventfd, error: %m\n");
2251         status = -1;
2252         goto out_delete_session;
2253     }
2254     err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2255     if (err < 0) {
2256         pr_err("Failed to add wakeup eventfd to poll list\n");
2257         status = err;
2258         goto out_delete_session;
2259     }
2260 #endif // HAVE_EVENTFD_SUPPORT
2261 
2262     session->header.env.comp_type  = PERF_COMP_ZSTD;
2263     session->header.env.comp_level = rec->opts.comp_level;
2264 
2265     if (rec->opts.kcore &&
2266         !record__kcore_readable(&session->machines.host)) {
2267         pr_err("ERROR: kcore is not readable.\n");
2268         return -1;
2269     }
2270 
2271     if (record__init_clock(rec))
2272         return -1;
2273 
2274     record__init_features(rec);
2275 
2276     if (forks) {
2277         err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2278                            workload_exec_failed_signal);
2279         if (err < 0) {
2280             pr_err("Couldn't run the workload!\n");
2281             status = err;
2282             goto out_delete_session;
2283         }
2284     }
2285 
2286     /*
2287      * If we have just single event and are sending data
2288      * through pipe, we need to force the ids allocation,
2289      * because we synthesize event name through the pipe
2290      * and need the id for that.
2291      */
2292     if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2293         rec->opts.sample_id = true;
2294 
2295     record__uniquify_name(rec);
2296 
2297     if (record__open(rec) != 0) {
2298         err = -1;
2299         goto out_free_threads;
2300     }
2301     session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
2302 
2303     if (rec->opts.kcore) {
2304         err = record__kcore_copy(&session->machines.host, data);
2305         if (err) {
2306             pr_err("ERROR: Failed to copy kcore\n");
2307             goto out_free_threads;
2308         }
2309     }
2310 
2311     err = bpf__apply_obj_config();
2312     if (err) {
2313         char errbuf[BUFSIZ];
2314 
2315         bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2316         pr_err("ERROR: Apply config to BPF failed: %s\n",
2317              errbuf);
2318         goto out_free_threads;
2319     }
2320 
2321     /*
2322      * Normally perf_session__new would do this, but it doesn't have the
2323      * evlist.
2324      */
2325     if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2326         pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2327         rec->tool.ordered_events = false;
2328     }
2329 
2330     if (!rec->evlist->core.nr_groups)
2331         perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2332 
2333     if (data->is_pipe) {
2334         err = perf_header__write_pipe(fd);
2335         if (err < 0)
2336             goto out_free_threads;
2337     } else {
2338         err = perf_session__write_header(session, rec->evlist, fd, false);
2339         if (err < 0)
2340             goto out_free_threads;
2341     }
2342 
2343     err = -1;
2344     if (!rec->no_buildid
2345         && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2346         pr_err("Couldn't generate buildids. "
2347                "Use --no-buildid to profile anyway.\n");
2348         goto out_free_threads;
2349     }
2350 
2351     err = record__setup_sb_evlist(rec);
2352     if (err)
2353         goto out_free_threads;
2354 
2355     err = record__synthesize(rec, false);
2356     if (err < 0)
2357         goto out_free_threads;
2358 
2359     if (rec->realtime_prio) {
2360         struct sched_param param;
2361 
2362         param.sched_priority = rec->realtime_prio;
2363         if (sched_setscheduler(0, SCHED_FIFO, &param)) {
2364             pr_err("Could not set realtime priority.\n");
2365             err = -1;
2366             goto out_free_threads;
2367         }
2368     }
2369 
2370     if (record__start_threads(rec))
2371         goto out_free_threads;
2372 
2373     /*
2374      * When perf is starting the traced process, all the events
2375      * (apart from group members) have enable_on_exec=1 set,
2376      * so don't spoil it by prematurely enabling them.
2377      */
2378     if (!target__none(&opts->target) && !opts->initial_delay)
2379         evlist__enable(rec->evlist);
2380 
2381     /*
2382      * Let the child rip
2383      */
2384     if (forks) {
2385         struct machine *machine = &session->machines.host;
2386         union perf_event *event;
2387         pid_t tgid;
2388 
2389         event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2390         if (event == NULL) {
2391             err = -ENOMEM;
2392             goto out_child;
2393         }
2394 
2395         /*
2396          * Some H/W events are generated before COMM event
2397          * which is emitted during exec(), so perf script
2398          * cannot see a correct process name for those events.
2399          * Synthesize COMM event to prevent it.
2400          */
2401         tgid = perf_event__synthesize_comm(tool, event,
2402                            rec->evlist->workload.pid,
2403                            process_synthesized_event,
2404                            machine);
2405         free(event);
2406 
2407         if (tgid == -1)
2408             goto out_child;
2409 
2410         event = malloc(sizeof(event->namespaces) +
2411                    (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2412                    machine->id_hdr_size);
2413         if (event == NULL) {
2414             err = -ENOMEM;
2415             goto out_child;
2416         }
2417 
2418         /*
2419          * Synthesize NAMESPACES event for the command specified.
2420          */
2421         perf_event__synthesize_namespaces(tool, event,
2422                           rec->evlist->workload.pid,
2423                           tgid, process_synthesized_event,
2424                           machine);
2425         free(event);
2426 
2427         evlist__start_workload(rec->evlist);
2428     }
2429 
2430     if (opts->initial_delay) {
2431         pr_info(EVLIST_DISABLED_MSG);
2432         if (opts->initial_delay > 0) {
2433             usleep(opts->initial_delay * USEC_PER_MSEC);
2434             evlist__enable(rec->evlist);
2435             pr_info(EVLIST_ENABLED_MSG);
2436         }
2437     }
2438 
2439     trigger_ready(&auxtrace_snapshot_trigger);
2440     trigger_ready(&switch_output_trigger);
2441     perf_hooks__invoke_record_start();
2442 
2443     /*
2444      * Must write FINISHED_INIT so it will be seen after all other
2445      * synthesized user events, but before any regular events.
2446      */
2447     err = write_finished_init(rec, false);
2448     if (err < 0)
2449         goto out_child;
2450 
2451     for (;;) {
2452         unsigned long long hits = thread->samples;
2453 
2454         /*
2455          * rec->evlist->bkw_mmap_state is possible to be
2456          * BKW_MMAP_EMPTY here: when done == true and
2457          * hits != rec->samples in previous round.
2458          *
2459          * evlist__toggle_bkw_mmap ensure we never
2460          * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2461          */
2462         if (trigger_is_hit(&switch_output_trigger) || done || draining)
2463             evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2464 
2465         if (record__mmap_read_all(rec, false) < 0) {
2466             trigger_error(&auxtrace_snapshot_trigger);
2467             trigger_error(&switch_output_trigger);
2468             err = -1;
2469             goto out_child;
2470         }
2471 
2472         if (auxtrace_record__snapshot_started) {
2473             auxtrace_record__snapshot_started = 0;
2474             if (!trigger_is_error(&auxtrace_snapshot_trigger))
2475                 record__read_auxtrace_snapshot(rec, false);
2476             if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2477                 pr_err("AUX area tracing snapshot failed\n");
2478                 err = -1;
2479                 goto out_child;
2480             }
2481         }
2482 
2483         if (trigger_is_hit(&switch_output_trigger)) {
2484             /*
2485              * If switch_output_trigger is hit, the data in
2486              * overwritable ring buffer should have been collected,
2487              * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2488              *
2489              * If SIGUSR2 raise after or during record__mmap_read_all(),
2490              * record__mmap_read_all() didn't collect data from
2491              * overwritable ring buffer. Read again.
2492              */
2493             if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2494                 continue;
2495             trigger_ready(&switch_output_trigger);
2496 
2497             /*
2498              * Reenable events in overwrite ring buffer after
2499              * record__mmap_read_all(): we should have collected
2500              * data from it.
2501              */
2502             evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2503 
2504             if (!quiet)
2505                 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2506                     record__waking(rec));
2507             thread->waking = 0;
2508             fd = record__switch_output(rec, false);
2509             if (fd < 0) {
2510                 pr_err("Failed to switch to new file\n");
2511                 trigger_error(&switch_output_trigger);
2512                 err = fd;
2513                 goto out_child;
2514             }
2515 
2516             /* re-arm the alarm */
2517             if (rec->switch_output.time)
2518                 alarm(rec->switch_output.time);
2519         }
2520 
2521         if (hits == thread->samples) {
2522             if (done || draining)
2523                 break;
2524             err = fdarray__poll(&thread->pollfd, -1);
2525             /*
2526              * Propagate error, only if there's any. Ignore positive
2527              * number of returned events and interrupt error.
2528              */
2529             if (err > 0 || (err < 0 && errno == EINTR))
2530                 err = 0;
2531             thread->waking++;
2532 
2533             if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2534                         record__thread_munmap_filtered, NULL) == 0)
2535                 draining = true;
2536 
2537             evlist__ctlfd_update(rec->evlist,
2538                 &thread->pollfd.entries[thread->ctlfd_pos]);
2539         }
2540 
2541         if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2542             switch (cmd) {
2543             case EVLIST_CTL_CMD_SNAPSHOT:
2544                 hit_auxtrace_snapshot_trigger(rec);
2545                 evlist__ctlfd_ack(rec->evlist);
2546                 break;
2547             case EVLIST_CTL_CMD_STOP:
2548                 done = 1;
2549                 break;
2550             case EVLIST_CTL_CMD_ACK:
2551             case EVLIST_CTL_CMD_UNSUPPORTED:
2552             case EVLIST_CTL_CMD_ENABLE:
2553             case EVLIST_CTL_CMD_DISABLE:
2554             case EVLIST_CTL_CMD_EVLIST:
2555             case EVLIST_CTL_CMD_PING:
2556             default:
2557                 break;
2558             }
2559         }
2560 
2561         /*
2562          * When perf is starting the traced process, at the end events
2563          * die with the process and we wait for that. Thus no need to
2564          * disable events in this case.
2565          */
2566         if (done && !disabled && !target__none(&opts->target)) {
2567             trigger_off(&auxtrace_snapshot_trigger);
2568             evlist__disable(rec->evlist);
2569             disabled = true;
2570         }
2571     }
2572 
2573     trigger_off(&auxtrace_snapshot_trigger);
2574     trigger_off(&switch_output_trigger);
2575 
2576     if (opts->auxtrace_snapshot_on_exit)
2577         record__auxtrace_snapshot_exit(rec);
2578 
2579     if (forks && workload_exec_errno) {
2580         char msg[STRERR_BUFSIZE], strevsels[2048];
2581         const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2582 
2583         evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels);
2584 
2585         pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2586             strevsels, argv[0], emsg);
2587         err = -1;
2588         goto out_child;
2589     }
2590 
2591     if (!quiet)
2592         fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2593             record__waking(rec));
2594 
2595     write_finished_init(rec, true);
2596 
2597     if (target__none(&rec->opts.target))
2598         record__synthesize_workload(rec, true);
2599 
2600 out_child:
2601     record__stop_threads(rec);
2602     record__mmap_read_all(rec, true);
2603 out_free_threads:
2604     record__free_thread_data(rec);
2605     evlist__finalize_ctlfd(rec->evlist);
2606     record__aio_mmap_read_sync(rec);
2607 
2608     if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2609         ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2610         session->header.env.comp_ratio = ratio + 0.5;
2611     }
2612 
2613     if (forks) {
2614         int exit_status;
2615 
2616         if (!child_finished)
2617             kill(rec->evlist->workload.pid, SIGTERM);
2618 
2619         wait(&exit_status);
2620 
2621         if (err < 0)
2622             status = err;
2623         else if (WIFEXITED(exit_status))
2624             status = WEXITSTATUS(exit_status);
2625         else if (WIFSIGNALED(exit_status))
2626             signr = WTERMSIG(exit_status);
2627     } else
2628         status = err;
2629 
2630     if (rec->off_cpu)
2631         rec->bytes_written += off_cpu_write(rec->session);
2632 
2633     record__synthesize(rec, true);
2634     /* this will be recalculated during process_buildids() */
2635     rec->samples = 0;
2636 
2637     if (!err) {
2638         if (!rec->timestamp_filename) {
2639             record__finish_output(rec);
2640         } else {
2641             fd = record__switch_output(rec, true);
2642             if (fd < 0) {
2643                 status = fd;
2644                 goto out_delete_session;
2645             }
2646         }
2647     }
2648 
2649     perf_hooks__invoke_record_end();
2650 
2651     if (!err && !quiet) {
2652         char samples[128];
2653         const char *postfix = rec->timestamp_filename ?
2654                     ".<timestamp>" : "";
2655 
2656         if (rec->samples && !rec->opts.full_auxtrace)
2657             scnprintf(samples, sizeof(samples),
2658                   " (%" PRIu64 " samples)", rec->samples);
2659         else
2660             samples[0] = '\0';
2661 
2662         fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
2663             perf_data__size(data) / 1024.0 / 1024.0,
2664             data->path, postfix, samples);
2665         if (ratio) {
2666             fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
2667                     rec->session->bytes_transferred / 1024.0 / 1024.0,
2668                     ratio);
2669         }
2670         fprintf(stderr, " ]\n");
2671     }
2672 
2673 out_delete_session:
2674 #ifdef HAVE_EVENTFD_SUPPORT
2675     if (done_fd >= 0)
2676         close(done_fd);
2677 #endif
2678     zstd_fini(&session->zstd_data);
2679     perf_session__delete(session);
2680 
2681     if (!opts->no_bpf_event)
2682         evlist__stop_sb_thread(rec->sb_evlist);
2683     return status;
2684 }
2685 
2686 static void callchain_debug(struct callchain_param *callchain)
2687 {
2688     static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2689 
2690     pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2691 
2692     if (callchain->record_mode == CALLCHAIN_DWARF)
2693         pr_debug("callchain: stack dump size %d\n",
2694              callchain->dump_size);
2695 }
2696 
2697 int record_opts__parse_callchain(struct record_opts *record,
2698                  struct callchain_param *callchain,
2699                  const char *arg, bool unset)
2700 {
2701     int ret;
2702     callchain->enabled = !unset;
2703 
2704     /* --no-call-graph */
2705     if (unset) {
2706         callchain->record_mode = CALLCHAIN_NONE;
2707         pr_debug("callchain: disabled\n");
2708         return 0;
2709     }
2710 
2711     ret = parse_callchain_record_opt(arg, callchain);
2712     if (!ret) {
2713         /* Enable data address sampling for DWARF unwind. */
2714         if (callchain->record_mode == CALLCHAIN_DWARF)
2715             record->sample_address = true;
2716         callchain_debug(callchain);
2717     }
2718 
2719     return ret;
2720 }
2721 
2722 int record_parse_callchain_opt(const struct option *opt,
2723                    const char *arg,
2724                    int unset)
2725 {
2726     return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
2727 }
2728 
2729 int record_callchain_opt(const struct option *opt,
2730              const char *arg __maybe_unused,
2731              int unset __maybe_unused)
2732 {
2733     struct callchain_param *callchain = opt->value;
2734 
2735     callchain->enabled = true;
2736 
2737     if (callchain->record_mode == CALLCHAIN_NONE)
2738         callchain->record_mode = CALLCHAIN_FP;
2739 
2740     callchain_debug(callchain);
2741     return 0;
2742 }
2743 
2744 static int perf_record_config(const char *var, const char *value, void *cb)
2745 {
2746     struct record *rec = cb;
2747 
2748     if (!strcmp(var, "record.build-id")) {
2749         if (!strcmp(value, "cache"))
2750             rec->no_buildid_cache = false;
2751         else if (!strcmp(value, "no-cache"))
2752             rec->no_buildid_cache = true;
2753         else if (!strcmp(value, "skip"))
2754             rec->no_buildid = true;
2755         else if (!strcmp(value, "mmap"))
2756             rec->buildid_mmap = true;
2757         else
2758             return -1;
2759         return 0;
2760     }
2761     if (!strcmp(var, "record.call-graph")) {
2762         var = "call-graph.record-mode";
2763         return perf_default_config(var, value, cb);
2764     }
2765 #ifdef HAVE_AIO_SUPPORT
2766     if (!strcmp(var, "record.aio")) {
2767         rec->opts.nr_cblocks = strtol(value, NULL, 0);
2768         if (!rec->opts.nr_cblocks)
2769             rec->opts.nr_cblocks = nr_cblocks_default;
2770     }
2771 #endif
2772     if (!strcmp(var, "record.debuginfod")) {
2773         rec->debuginfod.urls = strdup(value);
2774         if (!rec->debuginfod.urls)
2775             return -ENOMEM;
2776         rec->debuginfod.set = true;
2777     }
2778 
2779     return 0;
2780 }
2781 
2782 
2783 static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2784 {
2785     struct record_opts *opts = (struct record_opts *)opt->value;
2786 
2787     if (unset || !str)
2788         return 0;
2789 
2790     if (!strcasecmp(str, "node"))
2791         opts->affinity = PERF_AFFINITY_NODE;
2792     else if (!strcasecmp(str, "cpu"))
2793         opts->affinity = PERF_AFFINITY_CPU;
2794 
2795     return 0;
2796 }
2797 
2798 static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
2799 {
2800     mask->nbits = nr_bits;
2801     mask->bits = bitmap_zalloc(mask->nbits);
2802     if (!mask->bits)
2803         return -ENOMEM;
2804 
2805     return 0;
2806 }
2807 
2808 static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
2809 {
2810     bitmap_free(mask->bits);
2811     mask->nbits = 0;
2812 }
2813 
2814 static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
2815 {
2816     int ret;
2817 
2818     ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
2819     if (ret) {
2820         mask->affinity.bits = NULL;
2821         return ret;
2822     }
2823 
2824     ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
2825     if (ret) {
2826         record__mmap_cpu_mask_free(&mask->maps);
2827         mask->maps.bits = NULL;
2828     }
2829 
2830     return ret;
2831 }
2832 
2833 static void record__thread_mask_free(struct thread_mask *mask)
2834 {
2835     record__mmap_cpu_mask_free(&mask->maps);
2836     record__mmap_cpu_mask_free(&mask->affinity);
2837 }
2838 
2839 static int record__parse_threads(const struct option *opt, const char *str, int unset)
2840 {
2841     int s;
2842     struct record_opts *opts = opt->value;
2843 
2844     if (unset || !str || !strlen(str)) {
2845         opts->threads_spec = THREAD_SPEC__CPU;
2846     } else {
2847         for (s = 1; s < THREAD_SPEC__MAX; s++) {
2848             if (s == THREAD_SPEC__USER) {
2849                 opts->threads_user_spec = strdup(str);
2850                 if (!opts->threads_user_spec)
2851                     return -ENOMEM;
2852                 opts->threads_spec = THREAD_SPEC__USER;
2853                 break;
2854             }
2855             if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
2856                 opts->threads_spec = s;
2857                 break;
2858             }
2859         }
2860     }
2861 
2862     if (opts->threads_spec == THREAD_SPEC__USER)
2863         pr_debug("threads_spec: %s\n", opts->threads_user_spec);
2864     else
2865         pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
2866 
2867     return 0;
2868 }
2869 
2870 static int parse_output_max_size(const struct option *opt,
2871                  const char *str, int unset)
2872 {
2873     unsigned long *s = (unsigned long *)opt->value;
2874     static struct parse_tag tags_size[] = {
2875         { .tag  = 'B', .mult = 1       },
2876         { .tag  = 'K', .mult = 1 << 10 },
2877         { .tag  = 'M', .mult = 1 << 20 },
2878         { .tag  = 'G', .mult = 1 << 30 },
2879         { .tag  = 0 },
2880     };
2881     unsigned long val;
2882 
2883     if (unset) {
2884         *s = 0;
2885         return 0;
2886     }
2887 
2888     val = parse_tag_value(str, tags_size);
2889     if (val != (unsigned long) -1) {
2890         *s = val;
2891         return 0;
2892     }
2893 
2894     return -1;
2895 }
2896 
2897 static int record__parse_mmap_pages(const struct option *opt,
2898                     const char *str,
2899                     int unset __maybe_unused)
2900 {
2901     struct record_opts *opts = opt->value;
2902     char *s, *p;
2903     unsigned int mmap_pages;
2904     int ret;
2905 
2906     if (!str)
2907         return -EINVAL;
2908 
2909     s = strdup(str);
2910     if (!s)
2911         return -ENOMEM;
2912 
2913     p = strchr(s, ',');
2914     if (p)
2915         *p = '\0';
2916 
2917     if (*s) {
2918         ret = __evlist__parse_mmap_pages(&mmap_pages, s);
2919         if (ret)
2920             goto out_free;
2921         opts->mmap_pages = mmap_pages;
2922     }
2923 
2924     if (!p) {
2925         ret = 0;
2926         goto out_free;
2927     }
2928 
2929     ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
2930     if (ret)
2931         goto out_free;
2932 
2933     opts->auxtrace_mmap_pages = mmap_pages;
2934 
2935 out_free:
2936     free(s);
2937     return ret;
2938 }
2939 
2940 void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
2941 {
2942 }
2943 
2944 static int parse_control_option(const struct option *opt,
2945                 const char *str,
2946                 int unset __maybe_unused)
2947 {
2948     struct record_opts *opts = opt->value;
2949 
2950     return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
2951 }
2952 
2953 static void switch_output_size_warn(struct record *rec)
2954 {
2955     u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
2956     struct switch_output *s = &rec->switch_output;
2957 
2958     wakeup_size /= 2;
2959 
2960     if (s->size < wakeup_size) {
2961         char buf[100];
2962 
2963         unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
2964         pr_warning("WARNING: switch-output data size lower than "
2965                "wakeup kernel buffer size (%s) "
2966                "expect bigger perf.data sizes\n", buf);
2967     }
2968 }
2969 
2970 static int switch_output_setup(struct record *rec)
2971 {
2972     struct switch_output *s = &rec->switch_output;
2973     static struct parse_tag tags_size[] = {
2974         { .tag  = 'B', .mult = 1       },
2975         { .tag  = 'K', .mult = 1 << 10 },
2976         { .tag  = 'M', .mult = 1 << 20 },
2977         { .tag  = 'G', .mult = 1 << 30 },
2978         { .tag  = 0 },
2979     };
2980     static struct parse_tag tags_time[] = {
2981         { .tag  = 's', .mult = 1        },
2982         { .tag  = 'm', .mult = 60       },
2983         { .tag  = 'h', .mult = 60*60    },
2984         { .tag  = 'd', .mult = 60*60*24 },
2985         { .tag  = 0 },
2986     };
2987     unsigned long val;
2988 
2989     /*
2990      * If we're using --switch-output-events, then we imply its 
2991      * --switch-output=signal, as we'll send a SIGUSR2 from the side band
2992      *  thread to its parent.
2993      */
2994     if (rec->switch_output_event_set) {
2995         if (record__threads_enabled(rec)) {
2996             pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
2997             return 0;
2998         }
2999         goto do_signal;
3000     }
3001 
3002     if (!s->set)
3003         return 0;
3004 
3005     if (record__threads_enabled(rec)) {
3006         pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
3007         return 0;
3008     }
3009 
3010     if (!strcmp(s->str, "signal")) {
3011 do_signal:
3012         s->signal = true;
3013         pr_debug("switch-output with SIGUSR2 signal\n");
3014         goto enabled;
3015     }
3016 
3017     val = parse_tag_value(s->str, tags_size);
3018     if (val != (unsigned long) -1) {
3019         s->size = val;
3020         pr_debug("switch-output with %s size threshold\n", s->str);
3021         goto enabled;
3022     }
3023 
3024     val = parse_tag_value(s->str, tags_time);
3025     if (val != (unsigned long) -1) {
3026         s->time = val;
3027         pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3028              s->str, s->time);
3029         goto enabled;
3030     }
3031 
3032     return -1;
3033 
3034 enabled:
3035     rec->timestamp_filename = true;
3036     s->enabled              = true;
3037 
3038     if (s->size && !rec->opts.no_buffering)
3039         switch_output_size_warn(rec);
3040 
3041     return 0;
3042 }
3043 
3044 static const char * const __record_usage[] = {
3045     "perf record [<options>] [<command>]",
3046     "perf record [<options>] -- <command> [<options>]",
3047     NULL
3048 };
3049 const char * const *record_usage = __record_usage;
3050 
3051 static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
3052                   struct perf_sample *sample, struct machine *machine)
3053 {
3054     /*
3055      * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3056      * no need to add them twice.
3057      */
3058     if (!(event->header.misc & PERF_RECORD_MISC_USER))
3059         return 0;
3060     return perf_event__process_mmap(tool, event, sample, machine);
3061 }
3062 
3063 static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
3064                    struct perf_sample *sample, struct machine *machine)
3065 {
3066     /*
3067      * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3068      * no need to add them twice.
3069      */
3070     if (!(event->header.misc & PERF_RECORD_MISC_USER))
3071         return 0;
3072 
3073     return perf_event__process_mmap2(tool, event, sample, machine);
3074 }
3075 
3076 static int process_timestamp_boundary(struct perf_tool *tool,
3077                       union perf_event *event __maybe_unused,
3078                       struct perf_sample *sample,
3079                       struct machine *machine __maybe_unused)
3080 {
3081     struct record *rec = container_of(tool, struct record, tool);
3082 
3083     set_timestamp_boundary(rec, sample->time);
3084     return 0;
3085 }
3086 
3087 static int parse_record_synth_option(const struct option *opt,
3088                      const char *str,
3089                      int unset __maybe_unused)
3090 {
3091     struct record_opts *opts = opt->value;
3092     char *p = strdup(str);
3093 
3094     if (p == NULL)
3095         return -1;
3096 
3097     opts->synth = parse_synth_opt(p);
3098     free(p);
3099 
3100     if (opts->synth < 0) {
3101         pr_err("Invalid synth option: %s\n", str);
3102         return -1;
3103     }
3104     return 0;
3105 }
3106 
3107 /*
3108  * XXX Ideally would be local to cmd_record() and passed to a record__new
3109  * because we need to have access to it in record__exit, that is called
3110  * after cmd_record() exits, but since record_options need to be accessible to
3111  * builtin-script, leave it here.
3112  *
3113  * At least we don't ouch it in all the other functions here directly.
3114  *
3115  * Just say no to tons of global variables, sigh.
3116  */
3117 static struct record record = {
3118     .opts = {
3119         .sample_time         = true,
3120         .mmap_pages      = UINT_MAX,
3121         .user_freq       = UINT_MAX,
3122         .user_interval       = ULLONG_MAX,
3123         .freq            = 4000,
3124         .target          = {
3125             .uses_mmap   = true,
3126             .default_per_cpu = true,
3127         },
3128         .mmap_flush          = MMAP_FLUSH_DEFAULT,
3129         .nr_threads_synthesize = 1,
3130         .ctl_fd              = -1,
3131         .ctl_fd_ack          = -1,
3132         .synth               = PERF_SYNTH_ALL,
3133     },
3134     .tool = {
3135         .sample     = process_sample_event,
3136         .fork       = perf_event__process_fork,
3137         .exit       = perf_event__process_exit,
3138         .comm       = perf_event__process_comm,
3139         .namespaces = perf_event__process_namespaces,
3140         .mmap       = build_id__process_mmap,
3141         .mmap2      = build_id__process_mmap2,
3142         .itrace_start   = process_timestamp_boundary,
3143         .aux        = process_timestamp_boundary,
3144         .ordered_events = true,
3145     },
3146 };
3147 
3148 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3149     "\n\t\t\t\tDefault: fp";
3150 
3151 static bool dry_run;
3152 
3153 /*
3154  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3155  * with it and switch to use the library functions in perf_evlist that came
3156  * from builtin-record.c, i.e. use record_opts,
3157  * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3158  * using pipes, etc.
3159  */
3160 static struct option __record_options[] = {
3161     OPT_CALLBACK('e', "event", &record.evlist, "event",
3162              "event selector. use 'perf list' to list available events",
3163              parse_events_option),
3164     OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3165              "event filter", parse_filter),
3166     OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3167                NULL, "don't record events from perf itself",
3168                exclude_perf),
3169     OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3170             "record events on existing process id"),
3171     OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3172             "record events on existing thread id"),
3173     OPT_INTEGER('r', "realtime", &record.realtime_prio,
3174             "collect data with this RT SCHED_FIFO priority"),
3175     OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3176             "collect data without buffering"),
3177     OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3178             "collect raw sample records from all opened counters"),
3179     OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3180                 "system-wide collection from all CPUs"),
3181     OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3182             "list of cpus to monitor"),
3183     OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3184     OPT_STRING('o', "output", &record.data.path, "file",
3185             "output file name"),
3186     OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3187             &record.opts.no_inherit_set,
3188             "child tasks do not inherit counters"),
3189     OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3190             "synthesize non-sample events at the end of output"),
3191     OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3192     OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3193     OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3194             "Fail if the specified frequency can't be used"),
3195     OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3196              "profile at this frequency",
3197               record__parse_freq),
3198     OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3199              "number of mmap data pages and AUX area tracing mmap pages",
3200              record__parse_mmap_pages),
3201     OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3202              "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3203              record__mmap_flush_parse),
3204     OPT_BOOLEAN(0, "group", &record.opts.group,
3205             "put the counters into a counter group"),
3206     OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3207                NULL, "enables call-graph recording" ,
3208                &record_callchain_opt),
3209     OPT_CALLBACK(0, "call-graph", &record.opts,
3210              "record_mode[,record_size]", record_callchain_help,
3211              &record_parse_callchain_opt),
3212     OPT_INCR('v', "verbose", &verbose,
3213             "be more verbose (show counter open errors, etc)"),
3214     OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
3215     OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3216             "per thread counts"),
3217     OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3218     OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3219             "Record the sample physical addresses"),
3220     OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3221             "Record the sampled data address data page size"),
3222     OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3223             "Record the sampled code address (ip) page size"),
3224     OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3225     OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier,
3226             "Record the sample identifier"),
3227     OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3228             &record.opts.sample_time_set,
3229             "Record the sample timestamps"),
3230     OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3231             "Record the sample period"),
3232     OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3233             "don't sample"),
3234     OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3235             &record.no_buildid_cache_set,
3236             "do not update the buildid cache"),
3237     OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3238             &record.no_buildid_set,
3239             "do not collect buildids in perf.data"),
3240     OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3241              "monitor event in cgroup name only",
3242              parse_cgroups),
3243     OPT_INTEGER('D', "delay", &record.opts.initial_delay,
3244           "ms to wait before starting measurement after program start (-1: start with events disabled)"),
3245     OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3246     OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
3247            "user to profile"),
3248 
3249     OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3250              "branch any", "sample any taken branches",
3251              parse_branch_stack),
3252 
3253     OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3254              "branch filter mask", "branch stack filter modes",
3255              parse_branch_stack),
3256     OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3257             "sample by weight (on special events only)"),
3258     OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3259             "sample transaction flags (special events only)"),
3260     OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3261             "use per-thread mmaps"),
3262     OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3263             "sample selected machine registers on interrupt,"
3264             " use '-I?' to list register names", parse_intr_regs),
3265     OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3266             "sample selected machine registers on interrupt,"
3267             " use '--user-regs=?' to list register names", parse_user_regs),
3268     OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3269             "Record running/enabled time of read (:S) events"),
3270     OPT_CALLBACK('k', "clockid", &record.opts,
3271     "clockid", "clockid to use for events, see clock_gettime()",
3272     parse_clockid),
3273     OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3274               "opts", "AUX area tracing Snapshot Mode", ""),
3275     OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3276               "opts", "sample AUX area", ""),
3277     OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3278             "per thread proc mmap processing timeout in ms"),
3279     OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3280             "Record namespaces events"),
3281     OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3282             "Record cgroup events"),
3283     OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3284             &record.opts.record_switch_events_set,
3285             "Record context switch events"),
3286     OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3287              "Configure all used events to run in kernel space.",
3288              PARSE_OPT_EXCLUSIVE),
3289     OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3290              "Configure all used events to run in user space.",
3291              PARSE_OPT_EXCLUSIVE),
3292     OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3293             "collect kernel callchains"),
3294     OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3295             "collect user callchains"),
3296     OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
3297            "clang binary to use for compiling BPF scriptlets"),
3298     OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
3299            "options passed to clang when compiling BPF scriptlets"),
3300     OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3301            "file", "vmlinux pathname"),
3302     OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3303             "Record build-id of all DSOs regardless of hits"),
3304     OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap,
3305             "Record build-id in map events"),
3306     OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3307             "append timestamp to output filename"),
3308     OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3309             "Record timestamp boundary (time of first/last samples)"),
3310     OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3311               &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3312               "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3313               "signal"),
3314     OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
3315              "switch output event selector. use 'perf list' to list available events",
3316              parse_events_option_new_evlist),
3317     OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3318            "Limit number of switch output generated files"),
3319     OPT_BOOLEAN(0, "dry-run", &dry_run,
3320             "Parse options then exit"),
3321 #ifdef HAVE_AIO_SUPPORT
3322     OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3323              &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3324              record__aio_parse),
3325 #endif
3326     OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3327              "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3328              record__parse_affinity),
3329 #ifdef HAVE_ZSTD_SUPPORT
3330     OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3331                 "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3332                 record__parse_comp_level),
3333 #endif
3334     OPT_CALLBACK(0, "max-size", &record.output_max_size,
3335              "size", "Limit the maximum size of the output file", parse_output_max_size),
3336     OPT_UINTEGER(0, "num-thread-synthesize",
3337              &record.opts.nr_threads_synthesize,
3338              "number of threads to run for event synthesis"),
3339 #ifdef HAVE_LIBPFM
3340     OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3341         "libpfm4 event selector. use 'perf list' to list available events",
3342         parse_libpfm_events_option),
3343 #endif
3344     OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3345              "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3346              "\t\t\t  'snapshot': AUX area tracing snapshot).\n"
3347              "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3348              "\t\t\t  Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3349               parse_control_option),
3350     OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3351              "Fine-tune event synthesis: default=all", parse_record_synth_option),
3352     OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3353               &record.debuginfod.set, "debuginfod urls",
3354               "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3355               "system"),
3356     OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3357                 "write collected trace data into several data files using parallel threads",
3358                 record__parse_threads),
3359     OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3360     OPT_END()
3361 };
3362 
3363 struct option *record_options = __record_options;
3364 
3365 static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3366 {
3367     struct perf_cpu cpu;
3368     int idx;
3369 
3370     if (cpu_map__is_dummy(cpus))
3371         return 0;
3372 
3373     perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
3374         if (cpu.cpu == -1)
3375             continue;
3376         /* Return ENODEV is input cpu is greater than max cpu */
3377         if ((unsigned long)cpu.cpu > mask->nbits)
3378             return -ENODEV;
3379         set_bit(cpu.cpu, mask->bits);
3380     }
3381 
3382     return 0;
3383 }
3384 
3385 static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3386 {
3387     struct perf_cpu_map *cpus;
3388 
3389     cpus = perf_cpu_map__new(mask_spec);
3390     if (!cpus)
3391         return -ENOMEM;
3392 
3393     bitmap_zero(mask->bits, mask->nbits);
3394     if (record__mmap_cpu_mask_init(mask, cpus))
3395         return -ENODEV;
3396 
3397     perf_cpu_map__put(cpus);
3398 
3399     return 0;
3400 }
3401 
3402 static void record__free_thread_masks(struct record *rec, int nr_threads)
3403 {
3404     int t;
3405 
3406     if (rec->thread_masks)
3407         for (t = 0; t < nr_threads; t++)
3408             record__thread_mask_free(&rec->thread_masks[t]);
3409 
3410     zfree(&rec->thread_masks);
3411 }
3412 
3413 static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3414 {
3415     int t, ret;
3416 
3417     rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3418     if (!rec->thread_masks) {
3419         pr_err("Failed to allocate thread masks\n");
3420         return -ENOMEM;
3421     }
3422 
3423     for (t = 0; t < nr_threads; t++) {
3424         ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3425         if (ret) {
3426             pr_err("Failed to allocate thread masks[%d]\n", t);
3427             goto out_free;
3428         }
3429     }
3430 
3431     return 0;
3432 
3433 out_free:
3434     record__free_thread_masks(rec, nr_threads);
3435 
3436     return ret;
3437 }
3438 
3439 static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3440 {
3441     int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3442 
3443     ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3444     if (ret)
3445         return ret;
3446 
3447     rec->nr_threads = nr_cpus;
3448     pr_debug("nr_threads: %d\n", rec->nr_threads);
3449 
3450     for (t = 0; t < rec->nr_threads; t++) {
3451         set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3452         set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3453         if (verbose) {
3454             pr_debug("thread_masks[%d]: ", t);
3455             mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3456             pr_debug("thread_masks[%d]: ", t);
3457             mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3458         }
3459     }
3460 
3461     return 0;
3462 }
3463 
3464 static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3465                       const char **maps_spec, const char **affinity_spec,
3466                       u32 nr_spec)
3467 {
3468     u32 s;
3469     int ret = 0, t = 0;
3470     struct mmap_cpu_mask cpus_mask;
3471     struct thread_mask thread_mask, full_mask, *thread_masks;
3472 
3473     ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3474     if (ret) {
3475         pr_err("Failed to allocate CPUs mask\n");
3476         return ret;
3477     }
3478 
3479     ret = record__mmap_cpu_mask_init(&cpus_mask, cpus);
3480     if (ret) {
3481         pr_err("Failed to init cpu mask\n");
3482         goto out_free_cpu_mask;
3483     }
3484 
3485     ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3486     if (ret) {
3487         pr_err("Failed to allocate full mask\n");
3488         goto out_free_cpu_mask;
3489     }
3490 
3491     ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3492     if (ret) {
3493         pr_err("Failed to allocate thread mask\n");
3494         goto out_free_full_and_cpu_masks;
3495     }
3496 
3497     for (s = 0; s < nr_spec; s++) {
3498         ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3499         if (ret) {
3500             pr_err("Failed to initialize maps thread mask\n");
3501             goto out_free;
3502         }
3503         ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3504         if (ret) {
3505             pr_err("Failed to initialize affinity thread mask\n");
3506             goto out_free;
3507         }
3508 
3509         /* ignore invalid CPUs but do not allow empty masks */
3510         if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3511                 cpus_mask.bits, thread_mask.maps.nbits)) {
3512             pr_err("Empty maps mask: %s\n", maps_spec[s]);
3513             ret = -EINVAL;
3514             goto out_free;
3515         }
3516         if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3517                 cpus_mask.bits, thread_mask.affinity.nbits)) {
3518             pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3519             ret = -EINVAL;
3520             goto out_free;
3521         }
3522 
3523         /* do not allow intersection with other masks (full_mask) */
3524         if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3525                       thread_mask.maps.nbits)) {
3526             pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3527             ret = -EINVAL;
3528             goto out_free;
3529         }
3530         if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3531                       thread_mask.affinity.nbits)) {
3532             pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3533             ret = -EINVAL;
3534             goto out_free;
3535         }
3536 
3537         bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3538               thread_mask.maps.bits, full_mask.maps.nbits);
3539         bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3540               thread_mask.affinity.bits, full_mask.maps.nbits);
3541 
3542         thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3543         if (!thread_masks) {
3544             pr_err("Failed to reallocate thread masks\n");
3545             ret = -ENOMEM;
3546             goto out_free;
3547         }
3548         rec->thread_masks = thread_masks;
3549         rec->thread_masks[t] = thread_mask;
3550         if (verbose) {
3551             pr_debug("thread_masks[%d]: ", t);
3552             mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3553             pr_debug("thread_masks[%d]: ", t);
3554             mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3555         }
3556         t++;
3557         ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3558         if (ret) {
3559             pr_err("Failed to allocate thread mask\n");
3560             goto out_free_full_and_cpu_masks;
3561         }
3562     }
3563     rec->nr_threads = t;
3564     pr_debug("nr_threads: %d\n", rec->nr_threads);
3565     if (!rec->nr_threads)
3566         ret = -EINVAL;
3567 
3568 out_free:
3569     record__thread_mask_free(&thread_mask);
3570 out_free_full_and_cpu_masks:
3571     record__thread_mask_free(&full_mask);
3572 out_free_cpu_mask:
3573     record__mmap_cpu_mask_free(&cpus_mask);
3574 
3575     return ret;
3576 }
3577 
3578 static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3579 {
3580     int ret;
3581     struct cpu_topology *topo;
3582 
3583     topo = cpu_topology__new();
3584     if (!topo) {
3585         pr_err("Failed to allocate CPU topology\n");
3586         return -ENOMEM;
3587     }
3588 
3589     ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3590                          topo->core_cpus_list, topo->core_cpus_lists);
3591     cpu_topology__delete(topo);
3592 
3593     return ret;
3594 }
3595 
3596 static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3597 {
3598     int ret;
3599     struct cpu_topology *topo;
3600 
3601     topo = cpu_topology__new();
3602     if (!topo) {
3603         pr_err("Failed to allocate CPU topology\n");
3604         return -ENOMEM;
3605     }
3606 
3607     ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3608                          topo->package_cpus_list, topo->package_cpus_lists);
3609     cpu_topology__delete(topo);
3610 
3611     return ret;
3612 }
3613 
3614 static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3615 {
3616     u32 s;
3617     int ret;
3618     const char **spec;
3619     struct numa_topology *topo;
3620 
3621     topo = numa_topology__new();
3622     if (!topo) {
3623         pr_err("Failed to allocate NUMA topology\n");
3624         return -ENOMEM;
3625     }
3626 
3627     spec = zalloc(topo->nr * sizeof(char *));
3628     if (!spec) {
3629         pr_err("Failed to allocate NUMA spec\n");
3630         ret = -ENOMEM;
3631         goto out_delete_topo;
3632     }
3633     for (s = 0; s < topo->nr; s++)
3634         spec[s] = topo->nodes[s].cpus;
3635 
3636     ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3637 
3638     zfree(&spec);
3639 
3640 out_delete_topo:
3641     numa_topology__delete(topo);
3642 
3643     return ret;
3644 }
3645 
3646 static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3647 {
3648     int t, ret;
3649     u32 s, nr_spec = 0;
3650     char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3651     char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3652 
3653     for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3654         spec = strtok_r(user_spec, ":", &spec_ptr);
3655         if (spec == NULL)
3656             break;
3657         pr_debug2("threads_spec[%d]: %s\n", t, spec);
3658         mask = strtok_r(spec, "/", &mask_ptr);
3659         if (mask == NULL)
3660             break;
3661         pr_debug2("  maps mask: %s\n", mask);
3662         tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3663         if (!tmp_spec) {
3664             pr_err("Failed to reallocate maps spec\n");
3665             ret = -ENOMEM;
3666             goto out_free;
3667         }
3668         maps_spec = tmp_spec;
3669         maps_spec[nr_spec] = dup_mask = strdup(mask);
3670         if (!maps_spec[nr_spec]) {
3671             pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3672             ret = -ENOMEM;
3673             goto out_free;
3674         }
3675         mask = strtok_r(NULL, "/", &mask_ptr);
3676         if (mask == NULL) {
3677             pr_err("Invalid thread maps or affinity specs\n");
3678             ret = -EINVAL;
3679             goto out_free;
3680         }
3681         pr_debug2("  affinity mask: %s\n", mask);
3682         tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
3683         if (!tmp_spec) {
3684             pr_err("Failed to reallocate affinity spec\n");
3685             ret = -ENOMEM;
3686             goto out_free;
3687         }
3688         affinity_spec = tmp_spec;
3689         affinity_spec[nr_spec] = strdup(mask);
3690         if (!affinity_spec[nr_spec]) {
3691             pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
3692             ret = -ENOMEM;
3693             goto out_free;
3694         }
3695         dup_mask = NULL;
3696         nr_spec++;
3697     }
3698 
3699     ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
3700                          (const char **)affinity_spec, nr_spec);
3701 
3702 out_free:
3703     free(dup_mask);
3704     for (s = 0; s < nr_spec; s++) {
3705         if (maps_spec)
3706             free(maps_spec[s]);
3707         if (affinity_spec)
3708             free(affinity_spec[s]);
3709     }
3710     free(affinity_spec);
3711     free(maps_spec);
3712 
3713     return ret;
3714 }
3715 
3716 static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
3717 {
3718     int ret;
3719 
3720     ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
3721     if (ret)
3722         return ret;
3723 
3724     if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus))
3725         return -ENODEV;
3726 
3727     rec->nr_threads = 1;
3728 
3729     return 0;
3730 }
3731 
3732 static int record__init_thread_masks(struct record *rec)
3733 {
3734     int ret = 0;
3735     struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
3736 
3737     if (!record__threads_enabled(rec))
3738         return record__init_thread_default_masks(rec, cpus);
3739 
3740     if (evlist__per_thread(rec->evlist)) {
3741         pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
3742         return -EINVAL;
3743     }
3744 
3745     switch (rec->opts.threads_spec) {
3746     case THREAD_SPEC__CPU:
3747         ret = record__init_thread_cpu_masks(rec, cpus);
3748         break;
3749     case THREAD_SPEC__CORE:
3750         ret = record__init_thread_core_masks(rec, cpus);
3751         break;
3752     case THREAD_SPEC__PACKAGE:
3753         ret = record__init_thread_package_masks(rec, cpus);
3754         break;
3755     case THREAD_SPEC__NUMA:
3756         ret = record__init_thread_numa_masks(rec, cpus);
3757         break;
3758     case THREAD_SPEC__USER:
3759         ret = record__init_thread_user_masks(rec, cpus);
3760         break;
3761     default:
3762         break;
3763     }
3764 
3765     return ret;
3766 }
3767 
3768 int cmd_record(int argc, const char **argv)
3769 {
3770     int err;
3771     struct record *rec = &record;
3772     char errbuf[BUFSIZ];
3773 
3774     setlocale(LC_ALL, "");
3775 
3776 #ifndef HAVE_LIBBPF_SUPPORT
3777 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
3778     set_nobuild('\0', "clang-path", true);
3779     set_nobuild('\0', "clang-opt", true);
3780 # undef set_nobuild
3781 #endif
3782 
3783 #ifndef HAVE_BPF_PROLOGUE
3784 # if !defined (HAVE_DWARF_SUPPORT)
3785 #  define REASON  "NO_DWARF=1"
3786 # elif !defined (HAVE_LIBBPF_SUPPORT)
3787 #  define REASON  "NO_LIBBPF=1"
3788 # else
3789 #  define REASON  "this architecture doesn't support BPF prologue"
3790 # endif
3791 # define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
3792     set_nobuild('\0', "vmlinux", true);
3793 # undef set_nobuild
3794 # undef REASON
3795 #endif
3796 
3797 #ifndef HAVE_BPF_SKEL
3798 # define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
3799     set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
3800 # undef set_nobuild
3801 #endif
3802 
3803     rec->opts.affinity = PERF_AFFINITY_SYS;
3804 
3805     rec->evlist = evlist__new();
3806     if (rec->evlist == NULL)
3807         return -ENOMEM;
3808 
3809     err = perf_config(perf_record_config, rec);
3810     if (err)
3811         return err;
3812 
3813     argc = parse_options(argc, argv, record_options, record_usage,
3814                 PARSE_OPT_STOP_AT_NON_OPTION);
3815     if (quiet)
3816         perf_quiet_option();
3817 
3818     err = symbol__validate_sym_arguments();
3819     if (err)
3820         return err;
3821 
3822     perf_debuginfod_setup(&record.debuginfod);
3823 
3824     /* Make system wide (-a) the default target. */
3825     if (!argc && target__none(&rec->opts.target))
3826         rec->opts.target.system_wide = true;
3827 
3828     if (nr_cgroups && !rec->opts.target.system_wide) {
3829         usage_with_options_msg(record_usage, record_options,
3830             "cgroup monitoring only available in system-wide mode");
3831 
3832     }
3833 
3834     if (rec->buildid_mmap) {
3835         if (!perf_can_record_build_id()) {
3836             pr_err("Failed: no support to record build id in mmap events, update your kernel.\n");
3837             err = -EINVAL;
3838             goto out_opts;
3839         }
3840         pr_debug("Enabling build id in mmap2 events.\n");
3841         /* Enable mmap build id synthesizing. */
3842         symbol_conf.buildid_mmap2 = true;
3843         /* Enable perf_event_attr::build_id bit. */
3844         rec->opts.build_id = true;
3845         /* Disable build id cache. */
3846         rec->no_buildid = true;
3847     }
3848 
3849     if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
3850         pr_err("Kernel has no cgroup sampling support.\n");
3851         err = -EINVAL;
3852         goto out_opts;
3853     }
3854 
3855     if (rec->opts.kcore)
3856         rec->opts.text_poke = true;
3857 
3858     if (rec->opts.kcore || record__threads_enabled(rec))
3859         rec->data.is_dir = true;
3860 
3861     if (record__threads_enabled(rec)) {
3862         if (rec->opts.affinity != PERF_AFFINITY_SYS) {
3863             pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
3864             goto out_opts;
3865         }
3866         if (record__aio_enabled(rec)) {
3867             pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
3868             goto out_opts;
3869         }
3870     }
3871 
3872     if (rec->opts.comp_level != 0) {
3873         pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
3874         rec->no_buildid = true;
3875     }
3876 
3877     if (rec->opts.record_switch_events &&
3878         !perf_can_record_switch_events()) {
3879         ui__error("kernel does not support recording context switch events\n");
3880         parse_options_usage(record_usage, record_options, "switch-events", 0);
3881         err = -EINVAL;
3882         goto out_opts;
3883     }
3884 
3885     if (switch_output_setup(rec)) {
3886         parse_options_usage(record_usage, record_options, "switch-output", 0);
3887         err = -EINVAL;
3888         goto out_opts;
3889     }
3890 
3891     if (rec->switch_output.time) {
3892         signal(SIGALRM, alarm_sig_handler);
3893         alarm(rec->switch_output.time);
3894     }
3895 
3896     if (rec->switch_output.num_files) {
3897         rec->switch_output.filenames = calloc(sizeof(char *),
3898                               rec->switch_output.num_files);
3899         if (!rec->switch_output.filenames) {
3900             err = -EINVAL;
3901             goto out_opts;
3902         }
3903     }
3904 
3905     if (rec->timestamp_filename && record__threads_enabled(rec)) {
3906         rec->timestamp_filename = false;
3907         pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
3908     }
3909 
3910     /*
3911      * Allow aliases to facilitate the lookup of symbols for address
3912      * filters. Refer to auxtrace_parse_filters().
3913      */
3914     symbol_conf.allow_aliases = true;
3915 
3916     symbol__init(NULL);
3917 
3918     err = record__auxtrace_init(rec);
3919     if (err)
3920         goto out;
3921 
3922     if (dry_run)
3923         goto out;
3924 
3925     err = bpf__setup_stdout(rec->evlist);
3926     if (err) {
3927         bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
3928         pr_err("ERROR: Setup BPF stdout failed: %s\n",
3929              errbuf);
3930         goto out;
3931     }
3932 
3933     err = -ENOMEM;
3934 
3935     if (rec->no_buildid_cache || rec->no_buildid) {
3936         disable_buildid_cache();
3937     } else if (rec->switch_output.enabled) {
3938         /*
3939          * In 'perf record --switch-output', disable buildid
3940          * generation by default to reduce data file switching
3941          * overhead. Still generate buildid if they are required
3942          * explicitly using
3943          *
3944          *  perf record --switch-output --no-no-buildid \
3945          *              --no-no-buildid-cache
3946          *
3947          * Following code equals to:
3948          *
3949          * if ((rec->no_buildid || !rec->no_buildid_set) &&
3950          *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
3951          *         disable_buildid_cache();
3952          */
3953         bool disable = true;
3954 
3955         if (rec->no_buildid_set && !rec->no_buildid)
3956             disable = false;
3957         if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
3958             disable = false;
3959         if (disable) {
3960             rec->no_buildid = true;
3961             rec->no_buildid_cache = true;
3962             disable_buildid_cache();
3963         }
3964     }
3965 
3966     if (record.opts.overwrite)
3967         record.opts.tail_synthesize = true;
3968 
3969     if (rec->evlist->core.nr_entries == 0) {
3970         if (perf_pmu__has_hybrid()) {
3971             err = evlist__add_default_hybrid(rec->evlist,
3972                              !record.opts.no_samples);
3973         } else {
3974             err = __evlist__add_default(rec->evlist,
3975                             !record.opts.no_samples);
3976         }
3977 
3978         if (err < 0) {
3979             pr_err("Not enough memory for event selector list\n");
3980             goto out;
3981         }
3982     }
3983 
3984     if (rec->opts.target.tid && !rec->opts.no_inherit_set)
3985         rec->opts.no_inherit = true;
3986 
3987     err = target__validate(&rec->opts.target);
3988     if (err) {
3989         target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3990         ui__warning("%s\n", errbuf);
3991     }
3992 
3993     err = target__parse_uid(&rec->opts.target);
3994     if (err) {
3995         int saved_errno = errno;
3996 
3997         target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3998         ui__error("%s", errbuf);
3999 
4000         err = -saved_errno;
4001         goto out;
4002     }
4003 
4004     /* Enable ignoring missing threads when -u/-p option is defined. */
4005     rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
4006 
4007     if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) {
4008         pr_err("failed to use cpu list %s\n",
4009                rec->opts.target.cpu_list);
4010         goto out;
4011     }
4012 
4013     rec->opts.target.hybrid = perf_pmu__has_hybrid();
4014 
4015     if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
4016         arch__add_leaf_frame_record_opts(&rec->opts);
4017 
4018     err = -ENOMEM;
4019     if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
4020         if (rec->opts.target.pid != NULL) {
4021             pr_err("Couldn't create thread/CPU maps: %s\n",
4022                 errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
4023             goto out;
4024         }
4025         else
4026             usage_with_options(record_usage, record_options);
4027     }
4028 
4029     err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
4030     if (err)
4031         goto out;
4032 
4033     /*
4034      * We take all buildids when the file contains
4035      * AUX area tracing data because we do not decode the
4036      * trace because it would take too long.
4037      */
4038     if (rec->opts.full_auxtrace)
4039         rec->buildid_all = true;
4040 
4041     if (rec->opts.text_poke) {
4042         err = record__config_text_poke(rec->evlist);
4043         if (err) {
4044             pr_err("record__config_text_poke failed, error %d\n", err);
4045             goto out;
4046         }
4047     }
4048 
4049     if (rec->off_cpu) {
4050         err = record__config_off_cpu(rec);
4051         if (err) {
4052             pr_err("record__config_off_cpu failed, error %d\n", err);
4053             goto out;
4054         }
4055     }
4056 
4057     if (record_opts__config(&rec->opts)) {
4058         err = -EINVAL;
4059         goto out;
4060     }
4061 
4062     err = record__init_thread_masks(rec);
4063     if (err) {
4064         pr_err("Failed to initialize parallel data streaming masks\n");
4065         goto out;
4066     }
4067 
4068     if (rec->opts.nr_cblocks > nr_cblocks_max)
4069         rec->opts.nr_cblocks = nr_cblocks_max;
4070     pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4071 
4072     pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4073     pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4074 
4075     if (rec->opts.comp_level > comp_level_max)
4076         rec->opts.comp_level = comp_level_max;
4077     pr_debug("comp level: %d\n", rec->opts.comp_level);
4078 
4079     err = __cmd_record(&record, argc, argv);
4080 out:
4081     evlist__delete(rec->evlist);
4082     symbol__exit();
4083     auxtrace_record__free(rec->itr);
4084 out_opts:
4085     record__free_thread_masks(rec, rec->nr_threads);
4086     rec->nr_threads = 0;
4087     evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4088     return err;
4089 }
4090 
4091 static void snapshot_sig_handler(int sig __maybe_unused)
4092 {
4093     struct record *rec = &record;
4094 
4095     hit_auxtrace_snapshot_trigger(rec);
4096 
4097     if (switch_output_signal(rec))
4098         trigger_hit(&switch_output_trigger);
4099 }
4100 
4101 static void alarm_sig_handler(int sig __maybe_unused)
4102 {
4103     struct record *rec = &record;
4104 
4105     if (switch_output_time(rec))
4106         trigger_hit(&switch_output_trigger);
4107 }