Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * builtin-inject.c
0004  *
0005  * Builtin inject command: Examine the live mode (stdin) event stream
0006  * and repipe it to stdout while optionally injecting additional
0007  * events into it.
0008  */
0009 #include "builtin.h"
0010 
0011 #include "util/color.h"
0012 #include "util/dso.h"
0013 #include "util/vdso.h"
0014 #include "util/evlist.h"
0015 #include "util/evsel.h"
0016 #include "util/map.h"
0017 #include "util/session.h"
0018 #include "util/tool.h"
0019 #include "util/debug.h"
0020 #include "util/build-id.h"
0021 #include "util/data.h"
0022 #include "util/auxtrace.h"
0023 #include "util/jit.h"
0024 #include "util/symbol.h"
0025 #include "util/synthetic-events.h"
0026 #include "util/thread.h"
0027 #include "util/namespaces.h"
0028 #include "util/util.h"
0029 #include "util/tsc.h"
0030 
0031 #include <internal/lib.h>
0032 
0033 #include <linux/err.h>
0034 #include <subcmd/parse-options.h>
0035 #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
0036 
0037 #include <linux/list.h>
0038 #include <linux/string.h>
0039 #include <linux/zalloc.h>
0040 #include <linux/hash.h>
0041 #include <errno.h>
0042 #include <signal.h>
0043 #include <inttypes.h>
0044 
0045 struct guest_event {
0046     struct perf_sample      sample;
0047     union perf_event        *event;
0048     char                event_buf[PERF_SAMPLE_MAX_SIZE];
0049 };
0050 
0051 struct guest_id {
0052     /* hlist_node must be first, see free_hlist() */
0053     struct hlist_node       node;
0054     u64             id;
0055     u64             host_id;
0056     u32             vcpu;
0057 };
0058 
0059 struct guest_tid {
0060     /* hlist_node must be first, see free_hlist() */
0061     struct hlist_node       node;
0062     /* Thread ID of QEMU thread */
0063     u32             tid;
0064     u32             vcpu;
0065 };
0066 
0067 struct guest_vcpu {
0068     /* Current host CPU */
0069     u32             cpu;
0070     /* Thread ID of QEMU thread */
0071     u32             tid;
0072 };
0073 
0074 struct guest_session {
0075     char                *perf_data_file;
0076     u32             machine_pid;
0077     u64             time_offset;
0078     double              time_scale;
0079     struct perf_tool        tool;
0080     struct perf_data        data;
0081     struct perf_session     *session;
0082     char                *tmp_file_name;
0083     int             tmp_fd;
0084     struct perf_tsc_conversion  host_tc;
0085     struct perf_tsc_conversion  guest_tc;
0086     bool                copy_kcore_dir;
0087     bool                have_tc;
0088     bool                fetched;
0089     bool                ready;
0090     u16             dflt_id_hdr_size;
0091     u64             dflt_id;
0092     u64             highest_id;
0093     /* Array of guest_vcpu */
0094     struct guest_vcpu       *vcpu;
0095     size_t              vcpu_cnt;
0096     /* Hash table for guest_id */
0097     struct hlist_head       heads[PERF_EVLIST__HLIST_SIZE];
0098     /* Hash table for guest_tid */
0099     struct hlist_head       tids[PERF_EVLIST__HLIST_SIZE];
0100     /* Place to stash next guest event */
0101     struct guest_event      ev;
0102 };
0103 
0104 struct perf_inject {
0105     struct perf_tool    tool;
0106     struct perf_session *session;
0107     bool            build_ids;
0108     bool            build_id_all;
0109     bool            sched_stat;
0110     bool            have_auxtrace;
0111     bool            strip;
0112     bool            jit_mode;
0113     bool            in_place_update;
0114     bool            in_place_update_dry_run;
0115     bool            is_pipe;
0116     bool            copy_kcore_dir;
0117     const char      *input_name;
0118     struct perf_data    output;
0119     u64         bytes_written;
0120     u64         aux_id;
0121     struct list_head    samples;
0122     struct itrace_synth_opts itrace_synth_opts;
0123     char            event_copy[PERF_SAMPLE_MAX_SIZE];
0124     struct perf_file_section secs[HEADER_FEAT_BITS];
0125     struct guest_session    guest_session;
0126 };
0127 
0128 struct event_entry {
0129     struct list_head node;
0130     u32      tid;
0131     union perf_event event[];
0132 };
0133 
0134 static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
0135                 struct machine *machine, u8 cpumode, u32 flags);
0136 
0137 static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
0138 {
0139     ssize_t size;
0140 
0141     size = perf_data__write(&inject->output, buf, sz);
0142     if (size < 0)
0143         return -errno;
0144 
0145     inject->bytes_written += size;
0146     return 0;
0147 }
0148 
0149 static int perf_event__repipe_synth(struct perf_tool *tool,
0150                     union perf_event *event)
0151 {
0152     struct perf_inject *inject = container_of(tool, struct perf_inject,
0153                           tool);
0154 
0155     return output_bytes(inject, event, event->header.size);
0156 }
0157 
0158 static int perf_event__repipe_oe_synth(struct perf_tool *tool,
0159                        union perf_event *event,
0160                        struct ordered_events *oe __maybe_unused)
0161 {
0162     return perf_event__repipe_synth(tool, event);
0163 }
0164 
0165 #ifdef HAVE_JITDUMP
0166 static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused,
0167                    union perf_event *event __maybe_unused,
0168                    struct ordered_events *oe __maybe_unused)
0169 {
0170     return 0;
0171 }
0172 #endif
0173 
0174 static int perf_event__repipe_op2_synth(struct perf_session *session,
0175                     union perf_event *event)
0176 {
0177     return perf_event__repipe_synth(session->tool, event);
0178 }
0179 
0180 static int perf_event__repipe_op4_synth(struct perf_session *session,
0181                     union perf_event *event,
0182                     u64 data __maybe_unused,
0183                     const char *str __maybe_unused)
0184 {
0185     return perf_event__repipe_synth(session->tool, event);
0186 }
0187 
0188 static int perf_event__repipe_attr(struct perf_tool *tool,
0189                    union perf_event *event,
0190                    struct evlist **pevlist)
0191 {
0192     struct perf_inject *inject = container_of(tool, struct perf_inject,
0193                           tool);
0194     int ret;
0195 
0196     ret = perf_event__process_attr(tool, event, pevlist);
0197     if (ret)
0198         return ret;
0199 
0200     if (!inject->is_pipe)
0201         return 0;
0202 
0203     return perf_event__repipe_synth(tool, event);
0204 }
0205 
0206 static int perf_event__repipe_event_update(struct perf_tool *tool,
0207                        union perf_event *event,
0208                        struct evlist **pevlist __maybe_unused)
0209 {
0210     return perf_event__repipe_synth(tool, event);
0211 }
0212 
0213 #ifdef HAVE_AUXTRACE_SUPPORT
0214 
0215 static int copy_bytes(struct perf_inject *inject, int fd, off_t size)
0216 {
0217     char buf[4096];
0218     ssize_t ssz;
0219     int ret;
0220 
0221     while (size > 0) {
0222         ssz = read(fd, buf, min(size, (off_t)sizeof(buf)));
0223         if (ssz < 0)
0224             return -errno;
0225         ret = output_bytes(inject, buf, ssz);
0226         if (ret)
0227             return ret;
0228         size -= ssz;
0229     }
0230 
0231     return 0;
0232 }
0233 
0234 static s64 perf_event__repipe_auxtrace(struct perf_session *session,
0235                        union perf_event *event)
0236 {
0237     struct perf_tool *tool = session->tool;
0238     struct perf_inject *inject = container_of(tool, struct perf_inject,
0239                           tool);
0240     int ret;
0241 
0242     inject->have_auxtrace = true;
0243 
0244     if (!inject->output.is_pipe) {
0245         off_t offset;
0246 
0247         offset = lseek(inject->output.file.fd, 0, SEEK_CUR);
0248         if (offset == -1)
0249             return -errno;
0250         ret = auxtrace_index__auxtrace_event(&session->auxtrace_index,
0251                              event, offset);
0252         if (ret < 0)
0253             return ret;
0254     }
0255 
0256     if (perf_data__is_pipe(session->data) || !session->one_mmap) {
0257         ret = output_bytes(inject, event, event->header.size);
0258         if (ret < 0)
0259             return ret;
0260         ret = copy_bytes(inject, perf_data__fd(session->data),
0261                  event->auxtrace.size);
0262     } else {
0263         ret = output_bytes(inject, event,
0264                    event->header.size + event->auxtrace.size);
0265     }
0266     if (ret < 0)
0267         return ret;
0268 
0269     return event->auxtrace.size;
0270 }
0271 
0272 #else
0273 
0274 static s64
0275 perf_event__repipe_auxtrace(struct perf_session *session __maybe_unused,
0276                 union perf_event *event __maybe_unused)
0277 {
0278     pr_err("AUX area tracing not supported\n");
0279     return -EINVAL;
0280 }
0281 
0282 #endif
0283 
0284 static int perf_event__repipe(struct perf_tool *tool,
0285                   union perf_event *event,
0286                   struct perf_sample *sample __maybe_unused,
0287                   struct machine *machine __maybe_unused)
0288 {
0289     return perf_event__repipe_synth(tool, event);
0290 }
0291 
0292 static int perf_event__drop(struct perf_tool *tool __maybe_unused,
0293                 union perf_event *event __maybe_unused,
0294                 struct perf_sample *sample __maybe_unused,
0295                 struct machine *machine __maybe_unused)
0296 {
0297     return 0;
0298 }
0299 
0300 static int perf_event__drop_aux(struct perf_tool *tool,
0301                 union perf_event *event __maybe_unused,
0302                 struct perf_sample *sample,
0303                 struct machine *machine __maybe_unused)
0304 {
0305     struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
0306 
0307     if (!inject->aux_id)
0308         inject->aux_id = sample->id;
0309 
0310     return 0;
0311 }
0312 
0313 static union perf_event *
0314 perf_inject__cut_auxtrace_sample(struct perf_inject *inject,
0315                  union perf_event *event,
0316                  struct perf_sample *sample)
0317 {
0318     size_t sz1 = sample->aux_sample.data - (void *)event;
0319     size_t sz2 = event->header.size - sample->aux_sample.size - sz1;
0320     union perf_event *ev = (union perf_event *)inject->event_copy;
0321 
0322     if (sz1 > event->header.size || sz2 > event->header.size ||
0323         sz1 + sz2 > event->header.size ||
0324         sz1 < sizeof(struct perf_event_header) + sizeof(u64))
0325         return event;
0326 
0327     memcpy(ev, event, sz1);
0328     memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2);
0329     ev->header.size = sz1 + sz2;
0330     ((u64 *)((void *)ev + sz1))[-1] = 0;
0331 
0332     return ev;
0333 }
0334 
0335 typedef int (*inject_handler)(struct perf_tool *tool,
0336                   union perf_event *event,
0337                   struct perf_sample *sample,
0338                   struct evsel *evsel,
0339                   struct machine *machine);
0340 
0341 static int perf_event__repipe_sample(struct perf_tool *tool,
0342                      union perf_event *event,
0343                      struct perf_sample *sample,
0344                      struct evsel *evsel,
0345                      struct machine *machine)
0346 {
0347     struct perf_inject *inject = container_of(tool, struct perf_inject,
0348                           tool);
0349 
0350     if (evsel && evsel->handler) {
0351         inject_handler f = evsel->handler;
0352         return f(tool, event, sample, evsel, machine);
0353     }
0354 
0355     build_id__mark_dso_hit(tool, event, sample, evsel, machine);
0356 
0357     if (inject->itrace_synth_opts.set && sample->aux_sample.size)
0358         event = perf_inject__cut_auxtrace_sample(inject, event, sample);
0359 
0360     return perf_event__repipe_synth(tool, event);
0361 }
0362 
0363 static int perf_event__repipe_mmap(struct perf_tool *tool,
0364                    union perf_event *event,
0365                    struct perf_sample *sample,
0366                    struct machine *machine)
0367 {
0368     int err;
0369 
0370     err = perf_event__process_mmap(tool, event, sample, machine);
0371     perf_event__repipe(tool, event, sample, machine);
0372 
0373     return err;
0374 }
0375 
0376 #ifdef HAVE_JITDUMP
0377 static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
0378                        union perf_event *event,
0379                        struct perf_sample *sample,
0380                        struct machine *machine)
0381 {
0382     struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
0383     u64 n = 0;
0384     int ret;
0385 
0386     /*
0387      * if jit marker, then inject jit mmaps and generate ELF images
0388      */
0389     ret = jit_process(inject->session, &inject->output, machine,
0390               event->mmap.filename, event->mmap.pid, event->mmap.tid, &n);
0391     if (ret < 0)
0392         return ret;
0393     if (ret) {
0394         inject->bytes_written += n;
0395         return 0;
0396     }
0397     return perf_event__repipe_mmap(tool, event, sample, machine);
0398 }
0399 #endif
0400 
0401 static struct dso *findnew_dso(int pid, int tid, const char *filename,
0402                    struct dso_id *id, struct machine *machine)
0403 {
0404     struct thread *thread;
0405     struct nsinfo *nsi = NULL;
0406     struct nsinfo *nnsi;
0407     struct dso *dso;
0408     bool vdso;
0409 
0410     thread = machine__findnew_thread(machine, pid, tid);
0411     if (thread == NULL) {
0412         pr_err("cannot find or create a task %d/%d.\n", tid, pid);
0413         return NULL;
0414     }
0415 
0416     vdso = is_vdso_map(filename);
0417     nsi = nsinfo__get(thread->nsinfo);
0418 
0419     if (vdso) {
0420         /* The vdso maps are always on the host and not the
0421          * container.  Ensure that we don't use setns to look
0422          * them up.
0423          */
0424         nnsi = nsinfo__copy(nsi);
0425         if (nnsi) {
0426             nsinfo__put(nsi);
0427             nsinfo__clear_need_setns(nnsi);
0428             nsi = nnsi;
0429         }
0430         dso = machine__findnew_vdso(machine, thread);
0431     } else {
0432         dso = machine__findnew_dso_id(machine, filename, id);
0433     }
0434 
0435     if (dso) {
0436         nsinfo__put(dso->nsinfo);
0437         dso->nsinfo = nsi;
0438     } else
0439         nsinfo__put(nsi);
0440 
0441     thread__put(thread);
0442     return dso;
0443 }
0444 
0445 static int perf_event__repipe_buildid_mmap(struct perf_tool *tool,
0446                        union perf_event *event,
0447                        struct perf_sample *sample,
0448                        struct machine *machine)
0449 {
0450     struct dso *dso;
0451 
0452     dso = findnew_dso(event->mmap.pid, event->mmap.tid,
0453               event->mmap.filename, NULL, machine);
0454 
0455     if (dso && !dso->hit) {
0456         dso->hit = 1;
0457         dso__inject_build_id(dso, tool, machine, sample->cpumode, 0);
0458     }
0459     dso__put(dso);
0460 
0461     return perf_event__repipe(tool, event, sample, machine);
0462 }
0463 
0464 static int perf_event__repipe_mmap2(struct perf_tool *tool,
0465                    union perf_event *event,
0466                    struct perf_sample *sample,
0467                    struct machine *machine)
0468 {
0469     int err;
0470 
0471     err = perf_event__process_mmap2(tool, event, sample, machine);
0472     perf_event__repipe(tool, event, sample, machine);
0473 
0474     if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
0475         struct dso *dso;
0476 
0477         dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
0478                   event->mmap2.filename, NULL, machine);
0479         if (dso) {
0480             /* mark it not to inject build-id */
0481             dso->hit = 1;
0482         }
0483         dso__put(dso);
0484     }
0485 
0486     return err;
0487 }
0488 
0489 #ifdef HAVE_JITDUMP
0490 static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
0491                     union perf_event *event,
0492                     struct perf_sample *sample,
0493                     struct machine *machine)
0494 {
0495     struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
0496     u64 n = 0;
0497     int ret;
0498 
0499     /*
0500      * if jit marker, then inject jit mmaps and generate ELF images
0501      */
0502     ret = jit_process(inject->session, &inject->output, machine,
0503               event->mmap2.filename, event->mmap2.pid, event->mmap2.tid, &n);
0504     if (ret < 0)
0505         return ret;
0506     if (ret) {
0507         inject->bytes_written += n;
0508         return 0;
0509     }
0510     return perf_event__repipe_mmap2(tool, event, sample, machine);
0511 }
0512 #endif
0513 
0514 static int perf_event__repipe_buildid_mmap2(struct perf_tool *tool,
0515                         union perf_event *event,
0516                         struct perf_sample *sample,
0517                         struct machine *machine)
0518 {
0519     struct dso_id dso_id = {
0520         .maj = event->mmap2.maj,
0521         .min = event->mmap2.min,
0522         .ino = event->mmap2.ino,
0523         .ino_generation = event->mmap2.ino_generation,
0524     };
0525     struct dso *dso;
0526 
0527     if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
0528         /* cannot use dso_id since it'd have invalid info */
0529         dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
0530                   event->mmap2.filename, NULL, machine);
0531         if (dso) {
0532             /* mark it not to inject build-id */
0533             dso->hit = 1;
0534         }
0535         dso__put(dso);
0536         return 0;
0537     }
0538 
0539     dso = findnew_dso(event->mmap2.pid, event->mmap2.tid,
0540               event->mmap2.filename, &dso_id, machine);
0541 
0542     if (dso && !dso->hit) {
0543         dso->hit = 1;
0544         dso__inject_build_id(dso, tool, machine, sample->cpumode,
0545                      event->mmap2.flags);
0546     }
0547     dso__put(dso);
0548 
0549     perf_event__repipe(tool, event, sample, machine);
0550 
0551     return 0;
0552 }
0553 
0554 static int perf_event__repipe_fork(struct perf_tool *tool,
0555                    union perf_event *event,
0556                    struct perf_sample *sample,
0557                    struct machine *machine)
0558 {
0559     int err;
0560 
0561     err = perf_event__process_fork(tool, event, sample, machine);
0562     perf_event__repipe(tool, event, sample, machine);
0563 
0564     return err;
0565 }
0566 
0567 static int perf_event__repipe_comm(struct perf_tool *tool,
0568                    union perf_event *event,
0569                    struct perf_sample *sample,
0570                    struct machine *machine)
0571 {
0572     int err;
0573 
0574     err = perf_event__process_comm(tool, event, sample, machine);
0575     perf_event__repipe(tool, event, sample, machine);
0576 
0577     return err;
0578 }
0579 
0580 static int perf_event__repipe_namespaces(struct perf_tool *tool,
0581                      union perf_event *event,
0582                      struct perf_sample *sample,
0583                      struct machine *machine)
0584 {
0585     int err = perf_event__process_namespaces(tool, event, sample, machine);
0586 
0587     perf_event__repipe(tool, event, sample, machine);
0588 
0589     return err;
0590 }
0591 
0592 static int perf_event__repipe_exit(struct perf_tool *tool,
0593                    union perf_event *event,
0594                    struct perf_sample *sample,
0595                    struct machine *machine)
0596 {
0597     int err;
0598 
0599     err = perf_event__process_exit(tool, event, sample, machine);
0600     perf_event__repipe(tool, event, sample, machine);
0601 
0602     return err;
0603 }
0604 
0605 static int perf_event__repipe_tracing_data(struct perf_session *session,
0606                        union perf_event *event)
0607 {
0608     perf_event__repipe_synth(session->tool, event);
0609 
0610     return perf_event__process_tracing_data(session, event);
0611 }
0612 
0613 static int dso__read_build_id(struct dso *dso)
0614 {
0615     struct nscookie nsc;
0616 
0617     if (dso->has_build_id)
0618         return 0;
0619 
0620     nsinfo__mountns_enter(dso->nsinfo, &nsc);
0621     if (filename__read_build_id(dso->long_name, &dso->bid) > 0)
0622         dso->has_build_id = true;
0623     else if (dso->nsinfo) {
0624         char *new_name;
0625 
0626         new_name = filename_with_chroot(dso->nsinfo->pid,
0627                         dso->long_name);
0628         if (new_name && filename__read_build_id(new_name, &dso->bid) > 0)
0629             dso->has_build_id = true;
0630         free(new_name);
0631     }
0632     nsinfo__mountns_exit(&nsc);
0633 
0634     return dso->has_build_id ? 0 : -1;
0635 }
0636 
0637 static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
0638                 struct machine *machine, u8 cpumode, u32 flags)
0639 {
0640     int err;
0641 
0642     if (is_anon_memory(dso->long_name) || flags & MAP_HUGETLB)
0643         return 0;
0644     if (is_no_dso_memory(dso->long_name))
0645         return 0;
0646 
0647     if (dso__read_build_id(dso) < 0) {
0648         pr_debug("no build_id found for %s\n", dso->long_name);
0649         return -1;
0650     }
0651 
0652     err = perf_event__synthesize_build_id(tool, dso, cpumode,
0653                           perf_event__repipe, machine);
0654     if (err) {
0655         pr_err("Can't synthesize build_id event for %s\n", dso->long_name);
0656         return -1;
0657     }
0658 
0659     return 0;
0660 }
0661 
0662 int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event,
0663                    struct perf_sample *sample,
0664                    struct evsel *evsel __maybe_unused,
0665                    struct machine *machine)
0666 {
0667     struct addr_location al;
0668     struct thread *thread;
0669 
0670     thread = machine__findnew_thread(machine, sample->pid, sample->tid);
0671     if (thread == NULL) {
0672         pr_err("problem processing %d event, skipping it.\n",
0673                event->header.type);
0674         goto repipe;
0675     }
0676 
0677     if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) {
0678         if (!al.map->dso->hit) {
0679             al.map->dso->hit = 1;
0680             dso__inject_build_id(al.map->dso, tool, machine,
0681                          sample->cpumode, al.map->flags);
0682         }
0683     }
0684 
0685     thread__put(thread);
0686 repipe:
0687     perf_event__repipe(tool, event, sample, machine);
0688     return 0;
0689 }
0690 
0691 static int perf_inject__sched_process_exit(struct perf_tool *tool,
0692                        union perf_event *event __maybe_unused,
0693                        struct perf_sample *sample,
0694                        struct evsel *evsel __maybe_unused,
0695                        struct machine *machine __maybe_unused)
0696 {
0697     struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
0698     struct event_entry *ent;
0699 
0700     list_for_each_entry(ent, &inject->samples, node) {
0701         if (sample->tid == ent->tid) {
0702             list_del_init(&ent->node);
0703             free(ent);
0704             break;
0705         }
0706     }
0707 
0708     return 0;
0709 }
0710 
0711 static int perf_inject__sched_switch(struct perf_tool *tool,
0712                      union perf_event *event,
0713                      struct perf_sample *sample,
0714                      struct evsel *evsel,
0715                      struct machine *machine)
0716 {
0717     struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
0718     struct event_entry *ent;
0719 
0720     perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
0721 
0722     ent = malloc(event->header.size + sizeof(struct event_entry));
0723     if (ent == NULL) {
0724         color_fprintf(stderr, PERF_COLOR_RED,
0725                  "Not enough memory to process sched switch event!");
0726         return -1;
0727     }
0728 
0729     ent->tid = sample->tid;
0730     memcpy(&ent->event, event, event->header.size);
0731     list_add(&ent->node, &inject->samples);
0732     return 0;
0733 }
0734 
0735 static int perf_inject__sched_stat(struct perf_tool *tool,
0736                    union perf_event *event __maybe_unused,
0737                    struct perf_sample *sample,
0738                    struct evsel *evsel,
0739                    struct machine *machine)
0740 {
0741     struct event_entry *ent;
0742     union perf_event *event_sw;
0743     struct perf_sample sample_sw;
0744     struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
0745     u32 pid = evsel__intval(evsel, sample, "pid");
0746 
0747     list_for_each_entry(ent, &inject->samples, node) {
0748         if (pid == ent->tid)
0749             goto found;
0750     }
0751 
0752     return 0;
0753 found:
0754     event_sw = &ent->event[0];
0755     evsel__parse_sample(evsel, event_sw, &sample_sw);
0756 
0757     sample_sw.period = sample->period;
0758     sample_sw.time   = sample->time;
0759     perf_event__synthesize_sample(event_sw, evsel->core.attr.sample_type,
0760                       evsel->core.attr.read_format, &sample_sw);
0761     build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
0762     return perf_event__repipe(tool, event_sw, &sample_sw, machine);
0763 }
0764 
0765 static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu)
0766 {
0767     if (realloc_array_as_needed(gs->vcpu, gs->vcpu_cnt, vcpu, NULL))
0768         return NULL;
0769     return &gs->vcpu[vcpu];
0770 }
0771 
0772 static int guest_session__output_bytes(struct guest_session *gs, void *buf, size_t sz)
0773 {
0774     ssize_t ret = writen(gs->tmp_fd, buf, sz);
0775 
0776     return ret < 0 ? ret : 0;
0777 }
0778 
0779 static int guest_session__repipe(struct perf_tool *tool,
0780                  union perf_event *event,
0781                  struct perf_sample *sample __maybe_unused,
0782                  struct machine *machine __maybe_unused)
0783 {
0784     struct guest_session *gs = container_of(tool, struct guest_session, tool);
0785 
0786     return guest_session__output_bytes(gs, event, event->header.size);
0787 }
0788 
0789 static int guest_session__map_tid(struct guest_session *gs, u32 tid, u32 vcpu)
0790 {
0791     struct guest_tid *guest_tid = zalloc(sizeof(*guest_tid));
0792     int hash;
0793 
0794     if (!guest_tid)
0795         return -ENOMEM;
0796 
0797     guest_tid->tid = tid;
0798     guest_tid->vcpu = vcpu;
0799     hash = hash_32(guest_tid->tid, PERF_EVLIST__HLIST_BITS);
0800     hlist_add_head(&guest_tid->node, &gs->tids[hash]);
0801 
0802     return 0;
0803 }
0804 
0805 static int host_peek_vm_comms_cb(struct perf_session *session __maybe_unused,
0806                  union perf_event *event,
0807                  u64 offset __maybe_unused, void *data)
0808 {
0809     struct guest_session *gs = data;
0810     unsigned int vcpu;
0811     struct guest_vcpu *guest_vcpu;
0812     int ret;
0813 
0814     if (event->header.type != PERF_RECORD_COMM ||
0815         event->comm.pid != gs->machine_pid)
0816         return 0;
0817 
0818     /*
0819      * QEMU option -name debug-threads=on, causes thread names formatted as
0820      * below, although it is not an ABI. Also libvirt seems to use this by
0821      * default. Here we rely on it to tell us which thread is which VCPU.
0822      */
0823     ret = sscanf(event->comm.comm, "CPU %u/KVM", &vcpu);
0824     if (ret <= 0)
0825         return ret;
0826     pr_debug("Found VCPU: tid %u comm %s vcpu %u\n",
0827          event->comm.tid, event->comm.comm, vcpu);
0828     if (vcpu > INT_MAX) {
0829         pr_err("Invalid VCPU %u\n", vcpu);
0830         return -EINVAL;
0831     }
0832     guest_vcpu = guest_session__vcpu(gs, vcpu);
0833     if (!guest_vcpu)
0834         return -ENOMEM;
0835     if (guest_vcpu->tid && guest_vcpu->tid != event->comm.tid) {
0836         pr_err("Fatal error: Two threads found with the same VCPU\n");
0837         return -EINVAL;
0838     }
0839     guest_vcpu->tid = event->comm.tid;
0840 
0841     return guest_session__map_tid(gs, event->comm.tid, vcpu);
0842 }
0843 
0844 static int host_peek_vm_comms(struct perf_session *session, struct guest_session *gs)
0845 {
0846     return perf_session__peek_events(session, session->header.data_offset,
0847                      session->header.data_size,
0848                      host_peek_vm_comms_cb, gs);
0849 }
0850 
0851 static bool evlist__is_id_used(struct evlist *evlist, u64 id)
0852 {
0853     return evlist__id2sid(evlist, id);
0854 }
0855 
0856 static u64 guest_session__allocate_new_id(struct guest_session *gs, struct evlist *host_evlist)
0857 {
0858     do {
0859         gs->highest_id += 1;
0860     } while (!gs->highest_id || evlist__is_id_used(host_evlist, gs->highest_id));
0861 
0862     return gs->highest_id;
0863 }
0864 
0865 static int guest_session__map_id(struct guest_session *gs, u64 id, u64 host_id, u32 vcpu)
0866 {
0867     struct guest_id *guest_id = zalloc(sizeof(*guest_id));
0868     int hash;
0869 
0870     if (!guest_id)
0871         return -ENOMEM;
0872 
0873     guest_id->id = id;
0874     guest_id->host_id = host_id;
0875     guest_id->vcpu = vcpu;
0876     hash = hash_64(guest_id->id, PERF_EVLIST__HLIST_BITS);
0877     hlist_add_head(&guest_id->node, &gs->heads[hash]);
0878 
0879     return 0;
0880 }
0881 
0882 static u64 evlist__find_highest_id(struct evlist *evlist)
0883 {
0884     struct evsel *evsel;
0885     u64 highest_id = 1;
0886 
0887     evlist__for_each_entry(evlist, evsel) {
0888         u32 j;
0889 
0890         for (j = 0; j < evsel->core.ids; j++) {
0891             u64 id = evsel->core.id[j];
0892 
0893             if (id > highest_id)
0894                 highest_id = id;
0895         }
0896     }
0897 
0898     return highest_id;
0899 }
0900 
0901 static int guest_session__map_ids(struct guest_session *gs, struct evlist *host_evlist)
0902 {
0903     struct evlist *evlist = gs->session->evlist;
0904     struct evsel *evsel;
0905     int ret;
0906 
0907     evlist__for_each_entry(evlist, evsel) {
0908         u32 j;
0909 
0910         for (j = 0; j < evsel->core.ids; j++) {
0911             struct perf_sample_id *sid;
0912             u64 host_id;
0913             u64 id;
0914 
0915             id = evsel->core.id[j];
0916             sid = evlist__id2sid(evlist, id);
0917             if (!sid || sid->cpu.cpu == -1)
0918                 continue;
0919             host_id = guest_session__allocate_new_id(gs, host_evlist);
0920             ret = guest_session__map_id(gs, id, host_id, sid->cpu.cpu);
0921             if (ret)
0922                 return ret;
0923         }
0924     }
0925 
0926     return 0;
0927 }
0928 
0929 static struct guest_id *guest_session__lookup_id(struct guest_session *gs, u64 id)
0930 {
0931     struct hlist_head *head;
0932     struct guest_id *guest_id;
0933     int hash;
0934 
0935     hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
0936     head = &gs->heads[hash];
0937 
0938     hlist_for_each_entry(guest_id, head, node)
0939         if (guest_id->id == id)
0940             return guest_id;
0941 
0942     return NULL;
0943 }
0944 
0945 static int process_attr(struct perf_tool *tool, union perf_event *event,
0946             struct perf_sample *sample __maybe_unused,
0947             struct machine *machine __maybe_unused)
0948 {
0949     struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
0950 
0951     return perf_event__process_attr(tool, event, &inject->session->evlist);
0952 }
0953 
0954 static int guest_session__add_attr(struct guest_session *gs, struct evsel *evsel)
0955 {
0956     struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
0957     struct perf_event_attr attr = evsel->core.attr;
0958     u64 *id_array;
0959     u32 *vcpu_array;
0960     int ret = -ENOMEM;
0961     u32 i;
0962 
0963     id_array = calloc(evsel->core.ids, sizeof(*id_array));
0964     if (!id_array)
0965         return -ENOMEM;
0966 
0967     vcpu_array = calloc(evsel->core.ids, sizeof(*vcpu_array));
0968     if (!vcpu_array)
0969         goto out;
0970 
0971     for (i = 0; i < evsel->core.ids; i++) {
0972         u64 id = evsel->core.id[i];
0973         struct guest_id *guest_id = guest_session__lookup_id(gs, id);
0974 
0975         if (!guest_id) {
0976             pr_err("Failed to find guest id %"PRIu64"\n", id);
0977             ret = -EINVAL;
0978             goto out;
0979         }
0980         id_array[i] = guest_id->host_id;
0981         vcpu_array[i] = guest_id->vcpu;
0982     }
0983 
0984     attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
0985     attr.exclude_host = 1;
0986     attr.exclude_guest = 0;
0987 
0988     ret = perf_event__synthesize_attr(&inject->tool, &attr, evsel->core.ids,
0989                       id_array, process_attr);
0990     if (ret)
0991         pr_err("Failed to add guest attr.\n");
0992 
0993     for (i = 0; i < evsel->core.ids; i++) {
0994         struct perf_sample_id *sid;
0995         u32 vcpu = vcpu_array[i];
0996 
0997         sid = evlist__id2sid(inject->session->evlist, id_array[i]);
0998         /* Guest event is per-thread from the host point of view */
0999         sid->cpu.cpu = -1;
1000         sid->tid = gs->vcpu[vcpu].tid;
1001         sid->machine_pid = gs->machine_pid;
1002         sid->vcpu.cpu = vcpu;
1003     }
1004 out:
1005     free(vcpu_array);
1006     free(id_array);
1007     return ret;
1008 }
1009 
1010 static int guest_session__add_attrs(struct guest_session *gs)
1011 {
1012     struct evlist *evlist = gs->session->evlist;
1013     struct evsel *evsel;
1014     int ret;
1015 
1016     evlist__for_each_entry(evlist, evsel) {
1017         ret = guest_session__add_attr(gs, evsel);
1018         if (ret)
1019             return ret;
1020     }
1021 
1022     return 0;
1023 }
1024 
1025 static int synthesize_id_index(struct perf_inject *inject, size_t new_cnt)
1026 {
1027     struct perf_session *session = inject->session;
1028     struct evlist *evlist = session->evlist;
1029     struct machine *machine = &session->machines.host;
1030     size_t from = evlist->core.nr_entries - new_cnt;
1031 
1032     return __perf_event__synthesize_id_index(&inject->tool, perf_event__repipe,
1033                          evlist, machine, from);
1034 }
1035 
1036 static struct guest_tid *guest_session__lookup_tid(struct guest_session *gs, u32 tid)
1037 {
1038     struct hlist_head *head;
1039     struct guest_tid *guest_tid;
1040     int hash;
1041 
1042     hash = hash_32(tid, PERF_EVLIST__HLIST_BITS);
1043     head = &gs->tids[hash];
1044 
1045     hlist_for_each_entry(guest_tid, head, node)
1046         if (guest_tid->tid == tid)
1047             return guest_tid;
1048 
1049     return NULL;
1050 }
1051 
1052 static bool dso__is_in_kernel_space(struct dso *dso)
1053 {
1054     if (dso__is_vdso(dso))
1055         return false;
1056 
1057     return dso__is_kcore(dso) ||
1058            dso->kernel ||
1059            is_kernel_module(dso->long_name, PERF_RECORD_MISC_CPUMODE_UNKNOWN);
1060 }
1061 
1062 static u64 evlist__first_id(struct evlist *evlist)
1063 {
1064     struct evsel *evsel;
1065 
1066     evlist__for_each_entry(evlist, evsel) {
1067         if (evsel->core.ids)
1068             return evsel->core.id[0];
1069     }
1070     return 0;
1071 }
1072 
1073 static int process_build_id(struct perf_tool *tool,
1074                 union perf_event *event,
1075                 struct perf_sample *sample __maybe_unused,
1076                 struct machine *machine __maybe_unused)
1077 {
1078     struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1079 
1080     return perf_event__process_build_id(inject->session, event);
1081 }
1082 
1083 static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid)
1084 {
1085     struct machine *machine = perf_session__findnew_machine(inject->session, machine_pid);
1086     u8 cpumode = dso__is_in_kernel_space(dso) ?
1087             PERF_RECORD_MISC_GUEST_KERNEL :
1088             PERF_RECORD_MISC_GUEST_USER;
1089 
1090     if (!machine)
1091         return -ENOMEM;
1092 
1093     dso->hit = 1;
1094 
1095     return perf_event__synthesize_build_id(&inject->tool, dso, cpumode,
1096                            process_build_id, machine);
1097 }
1098 
1099 static int guest_session__add_build_ids(struct guest_session *gs)
1100 {
1101     struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1102     struct machine *machine = &gs->session->machines.host;
1103     struct dso *dso;
1104     int ret;
1105 
1106     /* Build IDs will be put in the Build ID feature section */
1107     perf_header__set_feat(&inject->session->header, HEADER_BUILD_ID);
1108 
1109     dsos__for_each_with_build_id(dso, &machine->dsos.head) {
1110         ret = synthesize_build_id(inject, dso, gs->machine_pid);
1111         if (ret)
1112             return ret;
1113     }
1114 
1115     return 0;
1116 }
1117 
1118 static int guest_session__ksymbol_event(struct perf_tool *tool,
1119                     union perf_event *event,
1120                     struct perf_sample *sample __maybe_unused,
1121                     struct machine *machine __maybe_unused)
1122 {
1123     struct guest_session *gs = container_of(tool, struct guest_session, tool);
1124 
1125     /* Only support out-of-line i.e. no BPF support */
1126     if (event->ksymbol.ksym_type != PERF_RECORD_KSYMBOL_TYPE_OOL)
1127         return 0;
1128 
1129     return guest_session__output_bytes(gs, event, event->header.size);
1130 }
1131 
1132 static int guest_session__start(struct guest_session *gs, const char *name, bool force)
1133 {
1134     char tmp_file_name[] = "/tmp/perf-inject-guest_session-XXXXXX";
1135     struct perf_session *session;
1136     int ret;
1137 
1138     /* Only these events will be injected */
1139     gs->tool.mmap       = guest_session__repipe;
1140     gs->tool.mmap2      = guest_session__repipe;
1141     gs->tool.comm       = guest_session__repipe;
1142     gs->tool.fork       = guest_session__repipe;
1143     gs->tool.exit       = guest_session__repipe;
1144     gs->tool.lost       = guest_session__repipe;
1145     gs->tool.context_switch = guest_session__repipe;
1146     gs->tool.ksymbol    = guest_session__ksymbol_event;
1147     gs->tool.text_poke  = guest_session__repipe;
1148     /*
1149      * Processing a build ID creates a struct dso with that build ID. Later,
1150      * all guest dsos are iterated and the build IDs processed into the host
1151      * session where they will be output to the Build ID feature section
1152      * when the perf.data file header is written.
1153      */
1154     gs->tool.build_id   = perf_event__process_build_id;
1155     /* Process the id index to know what VCPU an ID belongs to */
1156     gs->tool.id_index   = perf_event__process_id_index;
1157 
1158     gs->tool.ordered_events = true;
1159     gs->tool.ordering_requires_timestamps = true;
1160 
1161     gs->data.path   = name;
1162     gs->data.force  = force;
1163     gs->data.mode   = PERF_DATA_MODE_READ;
1164 
1165     session = perf_session__new(&gs->data, &gs->tool);
1166     if (IS_ERR(session))
1167         return PTR_ERR(session);
1168     gs->session = session;
1169 
1170     /*
1171      * Initial events have zero'd ID samples. Get default ID sample size
1172      * used for removing them.
1173      */
1174     gs->dflt_id_hdr_size = session->machines.host.id_hdr_size;
1175     /* And default ID for adding back a host-compatible ID sample */
1176     gs->dflt_id = evlist__first_id(session->evlist);
1177     if (!gs->dflt_id) {
1178         pr_err("Guest data has no sample IDs");
1179         return -EINVAL;
1180     }
1181 
1182     /* Temporary file for guest events */
1183     gs->tmp_file_name = strdup(tmp_file_name);
1184     if (!gs->tmp_file_name)
1185         return -ENOMEM;
1186     gs->tmp_fd = mkstemp(gs->tmp_file_name);
1187     if (gs->tmp_fd < 0)
1188         return -errno;
1189 
1190     if (zstd_init(&gs->session->zstd_data, 0) < 0)
1191         pr_warning("Guest session decompression initialization failed.\n");
1192 
1193     /*
1194      * perf does not support processing 2 sessions simultaneously, so output
1195      * guest events to a temporary file.
1196      */
1197     ret = perf_session__process_events(gs->session);
1198     if (ret)
1199         return ret;
1200 
1201     if (lseek(gs->tmp_fd, 0, SEEK_SET))
1202         return -errno;
1203 
1204     return 0;
1205 }
1206 
1207 /* Free hlist nodes assuming hlist_node is the first member of hlist entries */
1208 static void free_hlist(struct hlist_head *heads, size_t hlist_sz)
1209 {
1210     struct hlist_node *pos, *n;
1211     size_t i;
1212 
1213     for (i = 0; i < hlist_sz; ++i) {
1214         hlist_for_each_safe(pos, n, &heads[i]) {
1215             hlist_del(pos);
1216             free(pos);
1217         }
1218     }
1219 }
1220 
1221 static void guest_session__exit(struct guest_session *gs)
1222 {
1223     if (gs->session) {
1224         perf_session__delete(gs->session);
1225         free_hlist(gs->heads, PERF_EVLIST__HLIST_SIZE);
1226         free_hlist(gs->tids, PERF_EVLIST__HLIST_SIZE);
1227     }
1228     if (gs->tmp_file_name) {
1229         if (gs->tmp_fd >= 0)
1230             close(gs->tmp_fd);
1231         unlink(gs->tmp_file_name);
1232         free(gs->tmp_file_name);
1233     }
1234     free(gs->vcpu);
1235     free(gs->perf_data_file);
1236 }
1237 
1238 static void get_tsc_conv(struct perf_tsc_conversion *tc, struct perf_record_time_conv *time_conv)
1239 {
1240     tc->time_shift      = time_conv->time_shift;
1241     tc->time_mult       = time_conv->time_mult;
1242     tc->time_zero       = time_conv->time_zero;
1243     tc->time_cycles     = time_conv->time_cycles;
1244     tc->time_mask       = time_conv->time_mask;
1245     tc->cap_user_time_zero  = time_conv->cap_user_time_zero;
1246     tc->cap_user_time_short = time_conv->cap_user_time_short;
1247 }
1248 
1249 static void guest_session__get_tc(struct guest_session *gs)
1250 {
1251     struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1252 
1253     get_tsc_conv(&gs->host_tc, &inject->session->time_conv);
1254     get_tsc_conv(&gs->guest_tc, &gs->session->time_conv);
1255 }
1256 
1257 static void guest_session__convert_time(struct guest_session *gs, u64 guest_time, u64 *host_time)
1258 {
1259     u64 tsc;
1260 
1261     if (!guest_time) {
1262         *host_time = 0;
1263         return;
1264     }
1265 
1266     if (gs->guest_tc.cap_user_time_zero)
1267         tsc = perf_time_to_tsc(guest_time, &gs->guest_tc);
1268     else
1269         tsc = guest_time;
1270 
1271     /*
1272      * This is the correct order of operations for x86 if the TSC Offset and
1273      * Multiplier values are used.
1274      */
1275     tsc -= gs->time_offset;
1276     tsc /= gs->time_scale;
1277 
1278     if (gs->host_tc.cap_user_time_zero)
1279         *host_time = tsc_to_perf_time(tsc, &gs->host_tc);
1280     else
1281         *host_time = tsc;
1282 }
1283 
1284 static int guest_session__fetch(struct guest_session *gs)
1285 {
1286     void *buf = gs->ev.event_buf;
1287     struct perf_event_header *hdr = buf;
1288     size_t hdr_sz = sizeof(*hdr);
1289     ssize_t ret;
1290 
1291     ret = readn(gs->tmp_fd, buf, hdr_sz);
1292     if (ret < 0)
1293         return ret;
1294 
1295     if (!ret) {
1296         /* Zero size means EOF */
1297         hdr->size = 0;
1298         return 0;
1299     }
1300 
1301     buf += hdr_sz;
1302 
1303     ret = readn(gs->tmp_fd, buf, hdr->size - hdr_sz);
1304     if (ret < 0)
1305         return ret;
1306 
1307     gs->ev.event = (union perf_event *)gs->ev.event_buf;
1308     gs->ev.sample.time = 0;
1309 
1310     if (hdr->type >= PERF_RECORD_USER_TYPE_START) {
1311         pr_err("Unexpected type fetching guest event");
1312         return 0;
1313     }
1314 
1315     ret = evlist__parse_sample(gs->session->evlist, gs->ev.event, &gs->ev.sample);
1316     if (ret) {
1317         pr_err("Parse failed fetching guest event");
1318         return ret;
1319     }
1320 
1321     if (!gs->have_tc) {
1322         guest_session__get_tc(gs);
1323         gs->have_tc = true;
1324     }
1325 
1326     guest_session__convert_time(gs, gs->ev.sample.time, &gs->ev.sample.time);
1327 
1328     return 0;
1329 }
1330 
1331 static int evlist__append_id_sample(struct evlist *evlist, union perf_event *ev,
1332                     const struct perf_sample *sample)
1333 {
1334     struct evsel *evsel;
1335     void *array;
1336     int ret;
1337 
1338     evsel = evlist__id2evsel(evlist, sample->id);
1339     array = ev;
1340 
1341     if (!evsel) {
1342         pr_err("No evsel for id %"PRIu64"\n", sample->id);
1343         return -EINVAL;
1344     }
1345 
1346     array += ev->header.size;
1347     ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample);
1348     if (ret < 0)
1349         return ret;
1350 
1351     if (ret & 7) {
1352         pr_err("Bad id sample size %d\n", ret);
1353         return -EINVAL;
1354     }
1355 
1356     ev->header.size += ret;
1357 
1358     return 0;
1359 }
1360 
1361 static int guest_session__inject_events(struct guest_session *gs, u64 timestamp)
1362 {
1363     struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1364     int ret;
1365 
1366     if (!gs->ready)
1367         return 0;
1368 
1369     while (1) {
1370         struct perf_sample *sample;
1371         struct guest_id *guest_id;
1372         union perf_event *ev;
1373         u16 id_hdr_size;
1374         u8 cpumode;
1375         u64 id;
1376 
1377         if (!gs->fetched) {
1378             ret = guest_session__fetch(gs);
1379             if (ret)
1380                 return ret;
1381             gs->fetched = true;
1382         }
1383 
1384         ev = gs->ev.event;
1385         sample = &gs->ev.sample;
1386 
1387         if (!ev->header.size)
1388             return 0; /* EOF */
1389 
1390         if (sample->time > timestamp)
1391             return 0;
1392 
1393         /* Change cpumode to guest */
1394         cpumode = ev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1395         if (cpumode & PERF_RECORD_MISC_USER)
1396             cpumode = PERF_RECORD_MISC_GUEST_USER;
1397         else
1398             cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
1399         ev->header.misc &= ~PERF_RECORD_MISC_CPUMODE_MASK;
1400         ev->header.misc |= cpumode;
1401 
1402         id = sample->id;
1403         if (!id) {
1404             id = gs->dflt_id;
1405             id_hdr_size = gs->dflt_id_hdr_size;
1406         } else {
1407             struct evsel *evsel = evlist__id2evsel(gs->session->evlist, id);
1408 
1409             id_hdr_size = evsel__id_hdr_size(evsel);
1410         }
1411 
1412         if (id_hdr_size & 7) {
1413             pr_err("Bad id_hdr_size %u\n", id_hdr_size);
1414             return -EINVAL;
1415         }
1416 
1417         if (ev->header.size & 7) {
1418             pr_err("Bad event size %u\n", ev->header.size);
1419             return -EINVAL;
1420         }
1421 
1422         /* Remove guest id sample */
1423         ev->header.size -= id_hdr_size;
1424 
1425         if (ev->header.size & 7) {
1426             pr_err("Bad raw event size %u\n", ev->header.size);
1427             return -EINVAL;
1428         }
1429 
1430         guest_id = guest_session__lookup_id(gs, id);
1431         if (!guest_id) {
1432             pr_err("Guest event with unknown id %llu\n",
1433                    (unsigned long long)id);
1434             return -EINVAL;
1435         }
1436 
1437         /* Change to host ID to avoid conflicting ID values */
1438         sample->id = guest_id->host_id;
1439         sample->stream_id = guest_id->host_id;
1440 
1441         if (sample->cpu != (u32)-1) {
1442             if (sample->cpu >= gs->vcpu_cnt) {
1443                 pr_err("Guest event with unknown VCPU %u\n",
1444                        sample->cpu);
1445                 return -EINVAL;
1446             }
1447             /* Change to host CPU instead of guest VCPU */
1448             sample->cpu = gs->vcpu[sample->cpu].cpu;
1449         }
1450 
1451         /* New id sample with new ID and CPU */
1452         ret = evlist__append_id_sample(inject->session->evlist, ev, sample);
1453         if (ret)
1454             return ret;
1455 
1456         if (ev->header.size & 7) {
1457             pr_err("Bad new event size %u\n", ev->header.size);
1458             return -EINVAL;
1459         }
1460 
1461         gs->fetched = false;
1462 
1463         ret = output_bytes(inject, ev, ev->header.size);
1464         if (ret)
1465             return ret;
1466     }
1467 }
1468 
1469 static int guest_session__flush_events(struct guest_session *gs)
1470 {
1471     return guest_session__inject_events(gs, -1);
1472 }
1473 
1474 static int host__repipe(struct perf_tool *tool,
1475             union perf_event *event,
1476             struct perf_sample *sample,
1477             struct machine *machine)
1478 {
1479     struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1480     int ret;
1481 
1482     ret = guest_session__inject_events(&inject->guest_session, sample->time);
1483     if (ret)
1484         return ret;
1485 
1486     return perf_event__repipe(tool, event, sample, machine);
1487 }
1488 
1489 static int host__finished_init(struct perf_session *session, union perf_event *event)
1490 {
1491     struct perf_inject *inject = container_of(session->tool, struct perf_inject, tool);
1492     struct guest_session *gs = &inject->guest_session;
1493     int ret;
1494 
1495     /*
1496      * Peek through host COMM events to find QEMU threads and the VCPU they
1497      * are running.
1498      */
1499     ret = host_peek_vm_comms(session, gs);
1500     if (ret)
1501         return ret;
1502 
1503     if (!gs->vcpu_cnt) {
1504         pr_err("No VCPU threads found for pid %u\n", gs->machine_pid);
1505         return -EINVAL;
1506     }
1507 
1508     /*
1509      * Allocate new (unused) host sample IDs and map them to the guest IDs.
1510      */
1511     gs->highest_id = evlist__find_highest_id(session->evlist);
1512     ret = guest_session__map_ids(gs, session->evlist);
1513     if (ret)
1514         return ret;
1515 
1516     ret = guest_session__add_attrs(gs);
1517     if (ret)
1518         return ret;
1519 
1520     ret = synthesize_id_index(inject, gs->session->evlist->core.nr_entries);
1521     if (ret) {
1522         pr_err("Failed to synthesize id_index\n");
1523         return ret;
1524     }
1525 
1526     ret = guest_session__add_build_ids(gs);
1527     if (ret) {
1528         pr_err("Failed to add guest build IDs\n");
1529         return ret;
1530     }
1531 
1532     gs->ready = true;
1533 
1534     ret = guest_session__inject_events(gs, 0);
1535     if (ret)
1536         return ret;
1537 
1538     return perf_event__repipe_op2_synth(session, event);
1539 }
1540 
1541 /*
1542  * Obey finished-round ordering. The FINISHED_ROUND event is first processed
1543  * which flushes host events to file up until the last flush time. Then inject
1544  * guest events up to the same time. Finally write out the FINISHED_ROUND event
1545  * itself.
1546  */
1547 static int host__finished_round(struct perf_tool *tool,
1548                 union perf_event *event,
1549                 struct ordered_events *oe)
1550 {
1551     struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1552     int ret = perf_event__process_finished_round(tool, event, oe);
1553     u64 timestamp = ordered_events__last_flush_time(oe);
1554 
1555     if (ret)
1556         return ret;
1557 
1558     ret = guest_session__inject_events(&inject->guest_session, timestamp);
1559     if (ret)
1560         return ret;
1561 
1562     return perf_event__repipe_oe_synth(tool, event, oe);
1563 }
1564 
1565 static int host__context_switch(struct perf_tool *tool,
1566                 union perf_event *event,
1567                 struct perf_sample *sample,
1568                 struct machine *machine)
1569 {
1570     struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
1571     bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
1572     struct guest_session *gs = &inject->guest_session;
1573     u32 pid = event->context_switch.next_prev_pid;
1574     u32 tid = event->context_switch.next_prev_tid;
1575     struct guest_tid *guest_tid;
1576     u32 vcpu;
1577 
1578     if (out || pid != gs->machine_pid)
1579         goto out;
1580 
1581     guest_tid = guest_session__lookup_tid(gs, tid);
1582     if (!guest_tid)
1583         goto out;
1584 
1585     if (sample->cpu == (u32)-1) {
1586         pr_err("Switch event does not have CPU\n");
1587         return -EINVAL;
1588     }
1589 
1590     vcpu = guest_tid->vcpu;
1591     if (vcpu >= gs->vcpu_cnt)
1592         return -EINVAL;
1593 
1594     /* Guest is switching in, record which CPU the VCPU is now running on */
1595     gs->vcpu[vcpu].cpu = sample->cpu;
1596 out:
1597     return host__repipe(tool, event, sample, machine);
1598 }
1599 
1600 static void sig_handler(int sig __maybe_unused)
1601 {
1602     session_done = 1;
1603 }
1604 
1605 static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg)
1606 {
1607     struct perf_event_attr *attr = &evsel->core.attr;
1608     const char *name = evsel__name(evsel);
1609 
1610     if (!(attr->sample_type & sample_type)) {
1611         pr_err("Samples for %s event do not have %s attribute set.",
1612             name, sample_msg);
1613         return -EINVAL;
1614     }
1615 
1616     return 0;
1617 }
1618 
1619 static int drop_sample(struct perf_tool *tool __maybe_unused,
1620                union perf_event *event __maybe_unused,
1621                struct perf_sample *sample __maybe_unused,
1622                struct evsel *evsel __maybe_unused,
1623                struct machine *machine __maybe_unused)
1624 {
1625     return 0;
1626 }
1627 
1628 static void strip_init(struct perf_inject *inject)
1629 {
1630     struct evlist *evlist = inject->session->evlist;
1631     struct evsel *evsel;
1632 
1633     inject->tool.context_switch = perf_event__drop;
1634 
1635     evlist__for_each_entry(evlist, evsel)
1636         evsel->handler = drop_sample;
1637 }
1638 
1639 static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset)
1640 {
1641     struct perf_inject *inject = opt->value;
1642     const char *args;
1643     char *dry_run;
1644 
1645     if (unset)
1646         return 0;
1647 
1648     inject->itrace_synth_opts.set = true;
1649     inject->itrace_synth_opts.vm_time_correlation = true;
1650     inject->in_place_update = true;
1651 
1652     if (!str)
1653         return 0;
1654 
1655     dry_run = skip_spaces(str);
1656     if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) {
1657         inject->itrace_synth_opts.vm_tm_corr_dry_run = true;
1658         inject->in_place_update_dry_run = true;
1659         args = dry_run + strlen("dry-run");
1660     } else {
1661         args = str;
1662     }
1663 
1664     inject->itrace_synth_opts.vm_tm_corr_args = strdup(args);
1665 
1666     return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM;
1667 }
1668 
1669 static int parse_guest_data(const struct option *opt, const char *str, int unset)
1670 {
1671     struct perf_inject *inject = opt->value;
1672     struct guest_session *gs = &inject->guest_session;
1673     char *tok;
1674     char *s;
1675 
1676     if (unset)
1677         return 0;
1678 
1679     if (!str)
1680         goto bad_args;
1681 
1682     s = strdup(str);
1683     if (!s)
1684         return -ENOMEM;
1685 
1686     gs->perf_data_file = strsep(&s, ",");
1687     if (!gs->perf_data_file)
1688         goto bad_args;
1689 
1690     gs->copy_kcore_dir = has_kcore_dir(gs->perf_data_file);
1691     if (gs->copy_kcore_dir)
1692         inject->output.is_dir = true;
1693 
1694     tok = strsep(&s, ",");
1695     if (!tok)
1696         goto bad_args;
1697     gs->machine_pid = strtoul(tok, NULL, 0);
1698     if (!inject->guest_session.machine_pid)
1699         goto bad_args;
1700 
1701     gs->time_scale = 1;
1702 
1703     tok = strsep(&s, ",");
1704     if (!tok)
1705         goto out;
1706     gs->time_offset = strtoull(tok, NULL, 0);
1707 
1708     tok = strsep(&s, ",");
1709     if (!tok)
1710         goto out;
1711     gs->time_scale = strtod(tok, NULL);
1712     if (!gs->time_scale)
1713         goto bad_args;
1714 out:
1715     return 0;
1716 
1717 bad_args:
1718     pr_err("--guest-data option requires guest perf.data file name, "
1719            "guest machine PID, and optionally guest timestamp offset, "
1720            "and guest timestamp scale factor, separated by commas.\n");
1721     return -1;
1722 }
1723 
1724 static int save_section_info_cb(struct perf_file_section *section,
1725                 struct perf_header *ph __maybe_unused,
1726                 int feat, int fd __maybe_unused, void *data)
1727 {
1728     struct perf_inject *inject = data;
1729 
1730     inject->secs[feat] = *section;
1731     return 0;
1732 }
1733 
1734 static int save_section_info(struct perf_inject *inject)
1735 {
1736     struct perf_header *header = &inject->session->header;
1737     int fd = perf_data__fd(inject->session->data);
1738 
1739     return perf_header__process_sections(header, fd, inject, save_section_info_cb);
1740 }
1741 
1742 static bool keep_feat(int feat)
1743 {
1744     switch (feat) {
1745     /* Keep original information that describes the machine or software */
1746     case HEADER_TRACING_DATA:
1747     case HEADER_HOSTNAME:
1748     case HEADER_OSRELEASE:
1749     case HEADER_VERSION:
1750     case HEADER_ARCH:
1751     case HEADER_NRCPUS:
1752     case HEADER_CPUDESC:
1753     case HEADER_CPUID:
1754     case HEADER_TOTAL_MEM:
1755     case HEADER_CPU_TOPOLOGY:
1756     case HEADER_NUMA_TOPOLOGY:
1757     case HEADER_PMU_MAPPINGS:
1758     case HEADER_CACHE:
1759     case HEADER_MEM_TOPOLOGY:
1760     case HEADER_CLOCKID:
1761     case HEADER_BPF_PROG_INFO:
1762     case HEADER_BPF_BTF:
1763     case HEADER_CPU_PMU_CAPS:
1764     case HEADER_CLOCK_DATA:
1765     case HEADER_HYBRID_TOPOLOGY:
1766     case HEADER_PMU_CAPS:
1767         return true;
1768     /* Information that can be updated */
1769     case HEADER_BUILD_ID:
1770     case HEADER_CMDLINE:
1771     case HEADER_EVENT_DESC:
1772     case HEADER_BRANCH_STACK:
1773     case HEADER_GROUP_DESC:
1774     case HEADER_AUXTRACE:
1775     case HEADER_STAT:
1776     case HEADER_SAMPLE_TIME:
1777     case HEADER_DIR_FORMAT:
1778     case HEADER_COMPRESSED:
1779     default:
1780         return false;
1781     };
1782 }
1783 
1784 static int read_file(int fd, u64 offs, void *buf, size_t sz)
1785 {
1786     ssize_t ret = preadn(fd, buf, sz, offs);
1787 
1788     if (ret < 0)
1789         return -errno;
1790     if ((size_t)ret != sz)
1791         return -EINVAL;
1792     return 0;
1793 }
1794 
1795 static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw)
1796 {
1797     int fd = perf_data__fd(inject->session->data);
1798     u64 offs = inject->secs[feat].offset;
1799     size_t sz = inject->secs[feat].size;
1800     void *buf = malloc(sz);
1801     int ret;
1802 
1803     if (!buf)
1804         return -ENOMEM;
1805 
1806     ret = read_file(fd, offs, buf, sz);
1807     if (ret)
1808         goto out_free;
1809 
1810     ret = fw->write(fw, buf, sz);
1811 out_free:
1812     free(buf);
1813     return ret;
1814 }
1815 
1816 struct inject_fc {
1817     struct feat_copier fc;
1818     struct perf_inject *inject;
1819 };
1820 
1821 static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw)
1822 {
1823     struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc);
1824     struct perf_inject *inject = inj_fc->inject;
1825     int ret;
1826 
1827     if (!inject->secs[feat].offset ||
1828         !keep_feat(feat))
1829         return 0;
1830 
1831     ret = feat_copy(inject, feat, fw);
1832     if (ret < 0)
1833         return ret;
1834 
1835     return 1; /* Feature section copied */
1836 }
1837 
1838 static int copy_kcore_dir(struct perf_inject *inject)
1839 {
1840     char *cmd;
1841     int ret;
1842 
1843     ret = asprintf(&cmd, "cp -r -n %s/kcore_dir* %s >/dev/null 2>&1",
1844                inject->input_name, inject->output.path);
1845     if (ret < 0)
1846         return ret;
1847     pr_debug("%s\n", cmd);
1848     ret = system(cmd);
1849     free(cmd);
1850     return ret;
1851 }
1852 
1853 static int guest_session__copy_kcore_dir(struct guest_session *gs)
1854 {
1855     struct perf_inject *inject = container_of(gs, struct perf_inject, guest_session);
1856     char *cmd;
1857     int ret;
1858 
1859     ret = asprintf(&cmd, "cp -r -n %s/kcore_dir %s/kcore_dir__%u >/dev/null 2>&1",
1860                gs->perf_data_file, inject->output.path, gs->machine_pid);
1861     if (ret < 0)
1862         return ret;
1863     pr_debug("%s\n", cmd);
1864     ret = system(cmd);
1865     free(cmd);
1866     return ret;
1867 }
1868 
1869 static int output_fd(struct perf_inject *inject)
1870 {
1871     return inject->in_place_update ? -1 : perf_data__fd(&inject->output);
1872 }
1873 
1874 static int __cmd_inject(struct perf_inject *inject)
1875 {
1876     int ret = -EINVAL;
1877     struct guest_session *gs = &inject->guest_session;
1878     struct perf_session *session = inject->session;
1879     int fd = output_fd(inject);
1880     u64 output_data_offset;
1881 
1882     signal(SIGINT, sig_handler);
1883 
1884     if (inject->build_ids || inject->sched_stat ||
1885         inject->itrace_synth_opts.set || inject->build_id_all) {
1886         inject->tool.mmap     = perf_event__repipe_mmap;
1887         inject->tool.mmap2    = perf_event__repipe_mmap2;
1888         inject->tool.fork     = perf_event__repipe_fork;
1889         inject->tool.tracing_data = perf_event__repipe_tracing_data;
1890     }
1891 
1892     output_data_offset = perf_session__data_offset(session->evlist);
1893 
1894     if (inject->build_id_all) {
1895         inject->tool.mmap     = perf_event__repipe_buildid_mmap;
1896         inject->tool.mmap2    = perf_event__repipe_buildid_mmap2;
1897     } else if (inject->build_ids) {
1898         inject->tool.sample = perf_event__inject_buildid;
1899     } else if (inject->sched_stat) {
1900         struct evsel *evsel;
1901 
1902         evlist__for_each_entry(session->evlist, evsel) {
1903             const char *name = evsel__name(evsel);
1904 
1905             if (!strcmp(name, "sched:sched_switch")) {
1906                 if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
1907                     return -EINVAL;
1908 
1909                 evsel->handler = perf_inject__sched_switch;
1910             } else if (!strcmp(name, "sched:sched_process_exit"))
1911                 evsel->handler = perf_inject__sched_process_exit;
1912             else if (!strncmp(name, "sched:sched_stat_", 17))
1913                 evsel->handler = perf_inject__sched_stat;
1914         }
1915     } else if (inject->itrace_synth_opts.vm_time_correlation) {
1916         session->itrace_synth_opts = &inject->itrace_synth_opts;
1917         memset(&inject->tool, 0, sizeof(inject->tool));
1918         inject->tool.id_index       = perf_event__process_id_index;
1919         inject->tool.auxtrace_info  = perf_event__process_auxtrace_info;
1920         inject->tool.auxtrace       = perf_event__process_auxtrace;
1921         inject->tool.auxtrace_error = perf_event__process_auxtrace_error;
1922         inject->tool.ordered_events = true;
1923         inject->tool.ordering_requires_timestamps = true;
1924     } else if (inject->itrace_synth_opts.set) {
1925         session->itrace_synth_opts = &inject->itrace_synth_opts;
1926         inject->itrace_synth_opts.inject = true;
1927         inject->tool.comm       = perf_event__repipe_comm;
1928         inject->tool.namespaces     = perf_event__repipe_namespaces;
1929         inject->tool.exit       = perf_event__repipe_exit;
1930         inject->tool.id_index       = perf_event__process_id_index;
1931         inject->tool.auxtrace_info  = perf_event__process_auxtrace_info;
1932         inject->tool.auxtrace       = perf_event__process_auxtrace;
1933         inject->tool.aux        = perf_event__drop_aux;
1934         inject->tool.itrace_start   = perf_event__drop_aux;
1935         inject->tool.aux_output_hw_id = perf_event__drop_aux;
1936         inject->tool.ordered_events = true;
1937         inject->tool.ordering_requires_timestamps = true;
1938         /* Allow space in the header for new attributes */
1939         output_data_offset = roundup(8192 + session->header.data_offset, 4096);
1940         if (inject->strip)
1941             strip_init(inject);
1942     } else if (gs->perf_data_file) {
1943         char *name = gs->perf_data_file;
1944 
1945         /*
1946          * Not strictly necessary, but keep these events in order wrt
1947          * guest events.
1948          */
1949         inject->tool.mmap       = host__repipe;
1950         inject->tool.mmap2      = host__repipe;
1951         inject->tool.comm       = host__repipe;
1952         inject->tool.fork       = host__repipe;
1953         inject->tool.exit       = host__repipe;
1954         inject->tool.lost       = host__repipe;
1955         inject->tool.context_switch = host__repipe;
1956         inject->tool.ksymbol        = host__repipe;
1957         inject->tool.text_poke      = host__repipe;
1958         /*
1959          * Once the host session has initialized, set up sample ID
1960          * mapping and feed in guest attrs, build IDs and initial
1961          * events.
1962          */
1963         inject->tool.finished_init  = host__finished_init;
1964         /* Obey finished round ordering */
1965         inject->tool.finished_round = host__finished_round,
1966         /* Keep track of which CPU a VCPU is runnng on */
1967         inject->tool.context_switch = host__context_switch;
1968         /*
1969          * Must order events to be able to obey finished round
1970          * ordering.
1971          */
1972         inject->tool.ordered_events = true;
1973         inject->tool.ordering_requires_timestamps = true;
1974         /* Set up a separate session to process guest perf.data file */
1975         ret = guest_session__start(gs, name, session->data->force);
1976         if (ret) {
1977             pr_err("Failed to process %s, error %d\n", name, ret);
1978             return ret;
1979         }
1980         /* Allow space in the header for guest attributes */
1981         output_data_offset += gs->session->header.data_offset;
1982         output_data_offset = roundup(output_data_offset, 4096);
1983     }
1984 
1985     if (!inject->itrace_synth_opts.set)
1986         auxtrace_index__free(&session->auxtrace_index);
1987 
1988     if (!inject->is_pipe && !inject->in_place_update)
1989         lseek(fd, output_data_offset, SEEK_SET);
1990 
1991     ret = perf_session__process_events(session);
1992     if (ret)
1993         return ret;
1994 
1995     if (gs->session) {
1996         /*
1997          * Remaining guest events have later timestamps. Flush them
1998          * out to file.
1999          */
2000         ret = guest_session__flush_events(gs);
2001         if (ret) {
2002             pr_err("Failed to flush guest events\n");
2003             return ret;
2004         }
2005     }
2006 
2007     if (!inject->is_pipe && !inject->in_place_update) {
2008         struct inject_fc inj_fc = {
2009             .fc.copy = feat_copy_cb,
2010             .inject = inject,
2011         };
2012 
2013         if (inject->build_ids)
2014             perf_header__set_feat(&session->header,
2015                           HEADER_BUILD_ID);
2016         /*
2017          * Keep all buildids when there is unprocessed AUX data because
2018          * it is not known which ones the AUX trace hits.
2019          */
2020         if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) &&
2021             inject->have_auxtrace && !inject->itrace_synth_opts.set)
2022             dsos__hit_all(session);
2023         /*
2024          * The AUX areas have been removed and replaced with
2025          * synthesized hardware events, so clear the feature flag.
2026          */
2027         if (inject->itrace_synth_opts.set) {
2028             perf_header__clear_feat(&session->header,
2029                         HEADER_AUXTRACE);
2030             if (inject->itrace_synth_opts.last_branch ||
2031                 inject->itrace_synth_opts.add_last_branch)
2032                 perf_header__set_feat(&session->header,
2033                               HEADER_BRANCH_STACK);
2034         }
2035         session->header.data_offset = output_data_offset;
2036         session->header.data_size = inject->bytes_written;
2037         perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc);
2038 
2039         if (inject->copy_kcore_dir) {
2040             ret = copy_kcore_dir(inject);
2041             if (ret) {
2042                 pr_err("Failed to copy kcore\n");
2043                 return ret;
2044             }
2045         }
2046         if (gs->copy_kcore_dir) {
2047             ret = guest_session__copy_kcore_dir(gs);
2048             if (ret) {
2049                 pr_err("Failed to copy guest kcore\n");
2050                 return ret;
2051             }
2052         }
2053     }
2054 
2055     return ret;
2056 }
2057 
2058 int cmd_inject(int argc, const char **argv)
2059 {
2060     struct perf_inject inject = {
2061         .tool = {
2062             .sample     = perf_event__repipe_sample,
2063             .read       = perf_event__repipe_sample,
2064             .mmap       = perf_event__repipe,
2065             .mmap2      = perf_event__repipe,
2066             .comm       = perf_event__repipe,
2067             .namespaces = perf_event__repipe,
2068             .cgroup     = perf_event__repipe,
2069             .fork       = perf_event__repipe,
2070             .exit       = perf_event__repipe,
2071             .lost       = perf_event__repipe,
2072             .lost_samples   = perf_event__repipe,
2073             .aux        = perf_event__repipe,
2074             .itrace_start   = perf_event__repipe,
2075             .aux_output_hw_id = perf_event__repipe,
2076             .context_switch = perf_event__repipe,
2077             .throttle   = perf_event__repipe,
2078             .unthrottle = perf_event__repipe,
2079             .ksymbol    = perf_event__repipe,
2080             .bpf        = perf_event__repipe,
2081             .text_poke  = perf_event__repipe,
2082             .attr       = perf_event__repipe_attr,
2083             .event_update   = perf_event__repipe_event_update,
2084             .tracing_data   = perf_event__repipe_op2_synth,
2085             .finished_round = perf_event__repipe_oe_synth,
2086             .build_id   = perf_event__repipe_op2_synth,
2087             .id_index   = perf_event__repipe_op2_synth,
2088             .auxtrace_info  = perf_event__repipe_op2_synth,
2089             .auxtrace_error = perf_event__repipe_op2_synth,
2090             .time_conv  = perf_event__repipe_op2_synth,
2091             .thread_map = perf_event__repipe_op2_synth,
2092             .cpu_map    = perf_event__repipe_op2_synth,
2093             .stat_config    = perf_event__repipe_op2_synth,
2094             .stat       = perf_event__repipe_op2_synth,
2095             .stat_round = perf_event__repipe_op2_synth,
2096             .feature    = perf_event__repipe_op2_synth,
2097             .finished_init  = perf_event__repipe_op2_synth,
2098             .compressed = perf_event__repipe_op4_synth,
2099             .auxtrace   = perf_event__repipe_auxtrace,
2100         },
2101         .input_name  = "-",
2102         .samples = LIST_HEAD_INIT(inject.samples),
2103         .output = {
2104             .path = "-",
2105             .mode = PERF_DATA_MODE_WRITE,
2106             .use_stdio = true,
2107         },
2108     };
2109     struct perf_data data = {
2110         .mode = PERF_DATA_MODE_READ,
2111         .use_stdio = true,
2112     };
2113     int ret;
2114     bool repipe = true;
2115 
2116     struct option options[] = {
2117         OPT_BOOLEAN('b', "build-ids", &inject.build_ids,
2118                 "Inject build-ids into the output stream"),
2119         OPT_BOOLEAN(0, "buildid-all", &inject.build_id_all,
2120                 "Inject build-ids of all DSOs into the output stream"),
2121         OPT_STRING('i', "input", &inject.input_name, "file",
2122                "input file name"),
2123         OPT_STRING('o', "output", &inject.output.path, "file",
2124                "output file name"),
2125         OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
2126                 "Merge sched-stat and sched-switch for getting events "
2127                 "where and how long tasks slept"),
2128 #ifdef HAVE_JITDUMP
2129         OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
2130 #endif
2131         OPT_INCR('v', "verbose", &verbose,
2132              "be more verbose (show build ids, etc)"),
2133         OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
2134                "file", "vmlinux pathname"),
2135         OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
2136                 "don't load vmlinux even if found"),
2137         OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
2138                "kallsyms pathname"),
2139         OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
2140         OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
2141                     NULL, "opts", "Instruction Tracing options\n"
2142                     ITRACE_HELP,
2143                     itrace_parse_synth_opts),
2144         OPT_BOOLEAN(0, "strip", &inject.strip,
2145                 "strip non-synthesized events (use with --itrace)"),
2146         OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts",
2147                     "correlate time between VM guests and the host",
2148                     parse_vm_time_correlation),
2149         OPT_CALLBACK_OPTARG(0, "guest-data", &inject, NULL, "opts",
2150                     "inject events from a guest perf.data file",
2151                     parse_guest_data),
2152         OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
2153                "guest mount directory under which every guest os"
2154                " instance has a subdir"),
2155         OPT_END()
2156     };
2157     const char * const inject_usage[] = {
2158         "perf inject [<options>]",
2159         NULL
2160     };
2161 #ifndef HAVE_JITDUMP
2162     set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
2163 #endif
2164     argc = parse_options(argc, argv, options, inject_usage, 0);
2165 
2166     /*
2167      * Any (unrecognized) arguments left?
2168      */
2169     if (argc)
2170         usage_with_options(inject_usage, options);
2171 
2172     if (inject.strip && !inject.itrace_synth_opts.set) {
2173         pr_err("--strip option requires --itrace option\n");
2174         return -1;
2175     }
2176 
2177     if (symbol__validate_sym_arguments())
2178         return -1;
2179 
2180     if (inject.in_place_update) {
2181         if (!strcmp(inject.input_name, "-")) {
2182             pr_err("Input file name required for in-place updating\n");
2183             return -1;
2184         }
2185         if (strcmp(inject.output.path, "-")) {
2186             pr_err("Output file name must not be specified for in-place updating\n");
2187             return -1;
2188         }
2189         if (!data.force && !inject.in_place_update_dry_run) {
2190             pr_err("The input file would be updated in place, "
2191                 "the --force option is required.\n");
2192             return -1;
2193         }
2194         if (!inject.in_place_update_dry_run)
2195             data.in_place_update = true;
2196     } else {
2197         if (strcmp(inject.output.path, "-") && !inject.strip &&
2198             has_kcore_dir(inject.input_name)) {
2199             inject.output.is_dir = true;
2200             inject.copy_kcore_dir = true;
2201         }
2202         if (perf_data__open(&inject.output)) {
2203             perror("failed to create output file");
2204             return -1;
2205         }
2206     }
2207 
2208     data.path = inject.input_name;
2209     if (!strcmp(inject.input_name, "-") || inject.output.is_pipe) {
2210         inject.is_pipe = true;
2211         /*
2212          * Do not repipe header when input is a regular file
2213          * since either it can rewrite the header at the end
2214          * or write a new pipe header.
2215          */
2216         if (strcmp(inject.input_name, "-"))
2217             repipe = false;
2218     }
2219 
2220     inject.session = __perf_session__new(&data, repipe,
2221                          output_fd(&inject),
2222                          &inject.tool);
2223     if (IS_ERR(inject.session)) {
2224         ret = PTR_ERR(inject.session);
2225         goto out_close_output;
2226     }
2227 
2228     if (zstd_init(&(inject.session->zstd_data), 0) < 0)
2229         pr_warning("Decompression initialization failed.\n");
2230 
2231     /* Save original section info before feature bits change */
2232     ret = save_section_info(&inject);
2233     if (ret)
2234         goto out_delete;
2235 
2236     if (!data.is_pipe && inject.output.is_pipe) {
2237         ret = perf_header__write_pipe(perf_data__fd(&inject.output));
2238         if (ret < 0) {
2239             pr_err("Couldn't write a new pipe header.\n");
2240             goto out_delete;
2241         }
2242 
2243         ret = perf_event__synthesize_for_pipe(&inject.tool,
2244                               inject.session,
2245                               &inject.output,
2246                               perf_event__repipe);
2247         if (ret < 0)
2248             goto out_delete;
2249     }
2250 
2251     if (inject.build_ids && !inject.build_id_all) {
2252         /*
2253          * to make sure the mmap records are ordered correctly
2254          * and so that the correct especially due to jitted code
2255          * mmaps. We cannot generate the buildid hit list and
2256          * inject the jit mmaps at the same time for now.
2257          */
2258         inject.tool.ordered_events = true;
2259         inject.tool.ordering_requires_timestamps = true;
2260     }
2261 
2262     if (inject.sched_stat) {
2263         inject.tool.ordered_events = true;
2264     }
2265 
2266 #ifdef HAVE_JITDUMP
2267     if (inject.jit_mode) {
2268         inject.tool.mmap2      = perf_event__jit_repipe_mmap2;
2269         inject.tool.mmap       = perf_event__jit_repipe_mmap;
2270         inject.tool.ordered_events = true;
2271         inject.tool.ordering_requires_timestamps = true;
2272         /*
2273          * JIT MMAP injection injects all MMAP events in one go, so it
2274          * does not obey finished_round semantics.
2275          */
2276         inject.tool.finished_round = perf_event__drop_oe;
2277     }
2278 #endif
2279     ret = symbol__init(&inject.session->header.env);
2280     if (ret < 0)
2281         goto out_delete;
2282 
2283     ret = __cmd_inject(&inject);
2284 
2285     guest_session__exit(&inject.guest_session);
2286 
2287 out_delete:
2288     zstd_fini(&(inject.session->zstd_data));
2289     perf_session__delete(inject.session);
2290 out_close_output:
2291     if (!inject.in_place_update)
2292         perf_data__close(&inject.output);
2293     free(inject.itrace_synth_opts.vm_tm_corr_args);
2294     return ret;
2295 }