Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
0004  *
0005  * Parts came from builtin-{top,stat,record}.c, see those files for further
0006  * copyright notes.
0007  */
0008 
0009 #include <byteswap.h>
0010 #include <errno.h>
0011 #include <inttypes.h>
0012 #include <linux/bitops.h>
0013 #include <api/fs/fs.h>
0014 #include <api/fs/tracing_path.h>
0015 #include <traceevent/event-parse.h>
0016 #include <linux/hw_breakpoint.h>
0017 #include <linux/perf_event.h>
0018 #include <linux/compiler.h>
0019 #include <linux/err.h>
0020 #include <linux/zalloc.h>
0021 #include <sys/ioctl.h>
0022 #include <sys/resource.h>
0023 #include <sys/types.h>
0024 #include <dirent.h>
0025 #include <stdlib.h>
0026 #include <perf/evsel.h>
0027 #include "asm/bug.h"
0028 #include "bpf_counter.h"
0029 #include "callchain.h"
0030 #include "cgroup.h"
0031 #include "counts.h"
0032 #include "event.h"
0033 #include "evsel.h"
0034 #include "util/env.h"
0035 #include "util/evsel_config.h"
0036 #include "util/evsel_fprintf.h"
0037 #include "evlist.h"
0038 #include <perf/cpumap.h>
0039 #include "thread_map.h"
0040 #include "target.h"
0041 #include "perf_regs.h"
0042 #include "record.h"
0043 #include "debug.h"
0044 #include "trace-event.h"
0045 #include "stat.h"
0046 #include "string2.h"
0047 #include "memswap.h"
0048 #include "util.h"
0049 #include "hashmap.h"
0050 #include "pmu-hybrid.h"
0051 #include "off_cpu.h"
0052 #include "../perf-sys.h"
0053 #include "util/parse-branch-options.h"
0054 #include <internal/xyarray.h>
0055 #include <internal/lib.h>
0056 
0057 #include <linux/ctype.h>
0058 
0059 struct perf_missing_features perf_missing_features;
0060 
0061 static clockid_t clockid;
0062 
0063 static const char *const perf_tool_event__tool_names[PERF_TOOL_MAX] = {
0064     NULL,
0065     "duration_time",
0066     "user_time",
0067     "system_time",
0068 };
0069 
0070 const char *perf_tool_event__to_str(enum perf_tool_event ev)
0071 {
0072     if (ev > PERF_TOOL_NONE && ev < PERF_TOOL_MAX)
0073         return perf_tool_event__tool_names[ev];
0074 
0075     return NULL;
0076 }
0077 
0078 enum perf_tool_event perf_tool_event__from_str(const char *str)
0079 {
0080     int i;
0081 
0082     perf_tool_event__for_each_event(i) {
0083         if (!strcmp(str, perf_tool_event__tool_names[i]))
0084             return i;
0085     }
0086     return PERF_TOOL_NONE;
0087 }
0088 
0089 
0090 static int evsel__no_extra_init(struct evsel *evsel __maybe_unused)
0091 {
0092     return 0;
0093 }
0094 
0095 void __weak test_attr__ready(void) { }
0096 
0097 static void evsel__no_extra_fini(struct evsel *evsel __maybe_unused)
0098 {
0099 }
0100 
0101 static struct {
0102     size_t  size;
0103     int (*init)(struct evsel *evsel);
0104     void    (*fini)(struct evsel *evsel);
0105 } perf_evsel__object = {
0106     .size = sizeof(struct evsel),
0107     .init = evsel__no_extra_init,
0108     .fini = evsel__no_extra_fini,
0109 };
0110 
0111 int evsel__object_config(size_t object_size, int (*init)(struct evsel *evsel),
0112              void (*fini)(struct evsel *evsel))
0113 {
0114 
0115     if (object_size == 0)
0116         goto set_methods;
0117 
0118     if (perf_evsel__object.size > object_size)
0119         return -EINVAL;
0120 
0121     perf_evsel__object.size = object_size;
0122 
0123 set_methods:
0124     if (init != NULL)
0125         perf_evsel__object.init = init;
0126 
0127     if (fini != NULL)
0128         perf_evsel__object.fini = fini;
0129 
0130     return 0;
0131 }
0132 
0133 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
0134 
0135 int __evsel__sample_size(u64 sample_type)
0136 {
0137     u64 mask = sample_type & PERF_SAMPLE_MASK;
0138     int size = 0;
0139     int i;
0140 
0141     for (i = 0; i < 64; i++) {
0142         if (mask & (1ULL << i))
0143             size++;
0144     }
0145 
0146     size *= sizeof(u64);
0147 
0148     return size;
0149 }
0150 
0151 /**
0152  * __perf_evsel__calc_id_pos - calculate id_pos.
0153  * @sample_type: sample type
0154  *
0155  * This function returns the position of the event id (PERF_SAMPLE_ID or
0156  * PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of struct
0157  * perf_record_sample.
0158  */
0159 static int __perf_evsel__calc_id_pos(u64 sample_type)
0160 {
0161     int idx = 0;
0162 
0163     if (sample_type & PERF_SAMPLE_IDENTIFIER)
0164         return 0;
0165 
0166     if (!(sample_type & PERF_SAMPLE_ID))
0167         return -1;
0168 
0169     if (sample_type & PERF_SAMPLE_IP)
0170         idx += 1;
0171 
0172     if (sample_type & PERF_SAMPLE_TID)
0173         idx += 1;
0174 
0175     if (sample_type & PERF_SAMPLE_TIME)
0176         idx += 1;
0177 
0178     if (sample_type & PERF_SAMPLE_ADDR)
0179         idx += 1;
0180 
0181     return idx;
0182 }
0183 
0184 /**
0185  * __perf_evsel__calc_is_pos - calculate is_pos.
0186  * @sample_type: sample type
0187  *
0188  * This function returns the position (counting backwards) of the event id
0189  * (PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if
0190  * sample_id_all is used there is an id sample appended to non-sample events.
0191  */
0192 static int __perf_evsel__calc_is_pos(u64 sample_type)
0193 {
0194     int idx = 1;
0195 
0196     if (sample_type & PERF_SAMPLE_IDENTIFIER)
0197         return 1;
0198 
0199     if (!(sample_type & PERF_SAMPLE_ID))
0200         return -1;
0201 
0202     if (sample_type & PERF_SAMPLE_CPU)
0203         idx += 1;
0204 
0205     if (sample_type & PERF_SAMPLE_STREAM_ID)
0206         idx += 1;
0207 
0208     return idx;
0209 }
0210 
0211 void evsel__calc_id_pos(struct evsel *evsel)
0212 {
0213     evsel->id_pos = __perf_evsel__calc_id_pos(evsel->core.attr.sample_type);
0214     evsel->is_pos = __perf_evsel__calc_is_pos(evsel->core.attr.sample_type);
0215 }
0216 
0217 void __evsel__set_sample_bit(struct evsel *evsel,
0218                   enum perf_event_sample_format bit)
0219 {
0220     if (!(evsel->core.attr.sample_type & bit)) {
0221         evsel->core.attr.sample_type |= bit;
0222         evsel->sample_size += sizeof(u64);
0223         evsel__calc_id_pos(evsel);
0224     }
0225 }
0226 
0227 void __evsel__reset_sample_bit(struct evsel *evsel,
0228                     enum perf_event_sample_format bit)
0229 {
0230     if (evsel->core.attr.sample_type & bit) {
0231         evsel->core.attr.sample_type &= ~bit;
0232         evsel->sample_size -= sizeof(u64);
0233         evsel__calc_id_pos(evsel);
0234     }
0235 }
0236 
0237 void evsel__set_sample_id(struct evsel *evsel,
0238                    bool can_sample_identifier)
0239 {
0240     if (can_sample_identifier) {
0241         evsel__reset_sample_bit(evsel, ID);
0242         evsel__set_sample_bit(evsel, IDENTIFIER);
0243     } else {
0244         evsel__set_sample_bit(evsel, ID);
0245     }
0246     evsel->core.attr.read_format |= PERF_FORMAT_ID;
0247 }
0248 
0249 /**
0250  * evsel__is_function_event - Return whether given evsel is a function
0251  * trace event
0252  *
0253  * @evsel - evsel selector to be tested
0254  *
0255  * Return %true if event is function trace event
0256  */
0257 bool evsel__is_function_event(struct evsel *evsel)
0258 {
0259 #define FUNCTION_EVENT "ftrace:function"
0260 
0261     return evsel->name &&
0262            !strncmp(FUNCTION_EVENT, evsel->name, sizeof(FUNCTION_EVENT));
0263 
0264 #undef FUNCTION_EVENT
0265 }
0266 
0267 void evsel__init(struct evsel *evsel,
0268          struct perf_event_attr *attr, int idx)
0269 {
0270     perf_evsel__init(&evsel->core, attr, idx);
0271     evsel->tracking    = !idx;
0272     evsel->unit    = strdup("");
0273     evsel->scale       = 1.0;
0274     evsel->max_events  = ULONG_MAX;
0275     evsel->evlist      = NULL;
0276     evsel->bpf_obj     = NULL;
0277     evsel->bpf_fd      = -1;
0278     INIT_LIST_HEAD(&evsel->config_terms);
0279     INIT_LIST_HEAD(&evsel->bpf_counter_list);
0280     perf_evsel__object.init(evsel);
0281     evsel->sample_size = __evsel__sample_size(attr->sample_type);
0282     evsel__calc_id_pos(evsel);
0283     evsel->cmdline_group_boundary = false;
0284     evsel->metric_expr   = NULL;
0285     evsel->metric_name   = NULL;
0286     evsel->metric_events = NULL;
0287     evsel->per_pkg_mask  = NULL;
0288     evsel->collect_stat  = false;
0289     evsel->pmu_name      = NULL;
0290 }
0291 
0292 struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx)
0293 {
0294     struct evsel *evsel = zalloc(perf_evsel__object.size);
0295 
0296     if (!evsel)
0297         return NULL;
0298     evsel__init(evsel, attr, idx);
0299 
0300     if (evsel__is_bpf_output(evsel) && !attr->sample_type) {
0301         evsel->core.attr.sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
0302                         PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
0303         evsel->core.attr.sample_period = 1;
0304     }
0305 
0306     if (evsel__is_clock(evsel)) {
0307         free((char *)evsel->unit);
0308         evsel->unit = strdup("msec");
0309         evsel->scale = 1e-6;
0310     }
0311 
0312     return evsel;
0313 }
0314 
0315 static bool perf_event_can_profile_kernel(void)
0316 {
0317     return perf_event_paranoid_check(1);
0318 }
0319 
0320 struct evsel *evsel__new_cycles(bool precise __maybe_unused, __u32 type, __u64 config)
0321 {
0322     struct perf_event_attr attr = {
0323         .type   = type,
0324         .config = config,
0325         .exclude_kernel = !perf_event_can_profile_kernel(),
0326     };
0327     struct evsel *evsel;
0328 
0329     event_attr_init(&attr);
0330 
0331     /*
0332      * Now let the usual logic to set up the perf_event_attr defaults
0333      * to kick in when we return and before perf_evsel__open() is called.
0334      */
0335     evsel = evsel__new(&attr);
0336     if (evsel == NULL)
0337         goto out;
0338 
0339     arch_evsel__fixup_new_cycles(&evsel->core.attr);
0340 
0341     evsel->precise_max = true;
0342 
0343     /* use asprintf() because free(evsel) assumes name is allocated */
0344     if (asprintf(&evsel->name, "cycles%s%s%.*s",
0345              (attr.precise_ip || attr.exclude_kernel) ? ":" : "",
0346              attr.exclude_kernel ? "u" : "",
0347              attr.precise_ip ? attr.precise_ip + 1 : 0, "ppp") < 0)
0348         goto error_free;
0349 out:
0350     return evsel;
0351 error_free:
0352     evsel__delete(evsel);
0353     evsel = NULL;
0354     goto out;
0355 }
0356 
0357 int copy_config_terms(struct list_head *dst, struct list_head *src)
0358 {
0359     struct evsel_config_term *pos, *tmp;
0360 
0361     list_for_each_entry(pos, src, list) {
0362         tmp = malloc(sizeof(*tmp));
0363         if (tmp == NULL)
0364             return -ENOMEM;
0365 
0366         *tmp = *pos;
0367         if (tmp->free_str) {
0368             tmp->val.str = strdup(pos->val.str);
0369             if (tmp->val.str == NULL) {
0370                 free(tmp);
0371                 return -ENOMEM;
0372             }
0373         }
0374         list_add_tail(&tmp->list, dst);
0375     }
0376     return 0;
0377 }
0378 
0379 static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src)
0380 {
0381     return copy_config_terms(&dst->config_terms, &src->config_terms);
0382 }
0383 
0384 /**
0385  * evsel__clone - create a new evsel copied from @orig
0386  * @orig: original evsel
0387  *
0388  * The assumption is that @orig is not configured nor opened yet.
0389  * So we only care about the attributes that can be set while it's parsed.
0390  */
0391 struct evsel *evsel__clone(struct evsel *orig)
0392 {
0393     struct evsel *evsel;
0394 
0395     BUG_ON(orig->core.fd);
0396     BUG_ON(orig->counts);
0397     BUG_ON(orig->priv);
0398     BUG_ON(orig->per_pkg_mask);
0399 
0400     /* cannot handle BPF objects for now */
0401     if (orig->bpf_obj)
0402         return NULL;
0403 
0404     evsel = evsel__new(&orig->core.attr);
0405     if (evsel == NULL)
0406         return NULL;
0407 
0408     evsel->core.cpus = perf_cpu_map__get(orig->core.cpus);
0409     evsel->core.own_cpus = perf_cpu_map__get(orig->core.own_cpus);
0410     evsel->core.threads = perf_thread_map__get(orig->core.threads);
0411     evsel->core.nr_members = orig->core.nr_members;
0412     evsel->core.system_wide = orig->core.system_wide;
0413     evsel->core.requires_cpu = orig->core.requires_cpu;
0414 
0415     if (orig->name) {
0416         evsel->name = strdup(orig->name);
0417         if (evsel->name == NULL)
0418             goto out_err;
0419     }
0420     if (orig->group_name) {
0421         evsel->group_name = strdup(orig->group_name);
0422         if (evsel->group_name == NULL)
0423             goto out_err;
0424     }
0425     if (orig->pmu_name) {
0426         evsel->pmu_name = strdup(orig->pmu_name);
0427         if (evsel->pmu_name == NULL)
0428             goto out_err;
0429     }
0430     if (orig->filter) {
0431         evsel->filter = strdup(orig->filter);
0432         if (evsel->filter == NULL)
0433             goto out_err;
0434     }
0435     if (orig->metric_id) {
0436         evsel->metric_id = strdup(orig->metric_id);
0437         if (evsel->metric_id == NULL)
0438             goto out_err;
0439     }
0440     evsel->cgrp = cgroup__get(orig->cgrp);
0441     evsel->tp_format = orig->tp_format;
0442     evsel->handler = orig->handler;
0443     evsel->core.leader = orig->core.leader;
0444 
0445     evsel->max_events = orig->max_events;
0446     evsel->tool_event = orig->tool_event;
0447     free((char *)evsel->unit);
0448     evsel->unit = strdup(orig->unit);
0449     if (evsel->unit == NULL)
0450         goto out_err;
0451 
0452     evsel->scale = orig->scale;
0453     evsel->snapshot = orig->snapshot;
0454     evsel->per_pkg = orig->per_pkg;
0455     evsel->percore = orig->percore;
0456     evsel->precise_max = orig->precise_max;
0457     evsel->use_uncore_alias = orig->use_uncore_alias;
0458     evsel->is_libpfm_event = orig->is_libpfm_event;
0459 
0460     evsel->exclude_GH = orig->exclude_GH;
0461     evsel->sample_read = orig->sample_read;
0462     evsel->auto_merge_stats = orig->auto_merge_stats;
0463     evsel->collect_stat = orig->collect_stat;
0464     evsel->weak_group = orig->weak_group;
0465     evsel->use_config_name = orig->use_config_name;
0466 
0467     if (evsel__copy_config_terms(evsel, orig) < 0)
0468         goto out_err;
0469 
0470     return evsel;
0471 
0472 out_err:
0473     evsel__delete(evsel);
0474     return NULL;
0475 }
0476 
0477 /*
0478  * Returns pointer with encoded error via <linux/err.h> interface.
0479  */
0480 struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx)
0481 {
0482     struct evsel *evsel = zalloc(perf_evsel__object.size);
0483     int err = -ENOMEM;
0484 
0485     if (evsel == NULL) {
0486         goto out_err;
0487     } else {
0488         struct perf_event_attr attr = {
0489             .type          = PERF_TYPE_TRACEPOINT,
0490             .sample_type   = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
0491                       PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
0492         };
0493 
0494         if (asprintf(&evsel->name, "%s:%s", sys, name) < 0)
0495             goto out_free;
0496 
0497         evsel->tp_format = trace_event__tp_format(sys, name);
0498         if (IS_ERR(evsel->tp_format)) {
0499             err = PTR_ERR(evsel->tp_format);
0500             goto out_free;
0501         }
0502 
0503         event_attr_init(&attr);
0504         attr.config = evsel->tp_format->id;
0505         attr.sample_period = 1;
0506         evsel__init(evsel, &attr, idx);
0507     }
0508 
0509     return evsel;
0510 
0511 out_free:
0512     zfree(&evsel->name);
0513     free(evsel);
0514 out_err:
0515     return ERR_PTR(err);
0516 }
0517 
0518 const char *const evsel__hw_names[PERF_COUNT_HW_MAX] = {
0519     "cycles",
0520     "instructions",
0521     "cache-references",
0522     "cache-misses",
0523     "branches",
0524     "branch-misses",
0525     "bus-cycles",
0526     "stalled-cycles-frontend",
0527     "stalled-cycles-backend",
0528     "ref-cycles",
0529 };
0530 
0531 char *evsel__bpf_counter_events;
0532 
0533 bool evsel__match_bpf_counter_events(const char *name)
0534 {
0535     int name_len;
0536     bool match;
0537     char *ptr;
0538 
0539     if (!evsel__bpf_counter_events)
0540         return false;
0541 
0542     ptr = strstr(evsel__bpf_counter_events, name);
0543     name_len = strlen(name);
0544 
0545     /* check name matches a full token in evsel__bpf_counter_events */
0546     match = (ptr != NULL) &&
0547         ((ptr == evsel__bpf_counter_events) || (*(ptr - 1) == ',')) &&
0548         ((*(ptr + name_len) == ',') || (*(ptr + name_len) == '\0'));
0549 
0550     return match;
0551 }
0552 
0553 static const char *__evsel__hw_name(u64 config)
0554 {
0555     if (config < PERF_COUNT_HW_MAX && evsel__hw_names[config])
0556         return evsel__hw_names[config];
0557 
0558     return "unknown-hardware";
0559 }
0560 
0561 static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size)
0562 {
0563     int colon = 0, r = 0;
0564     struct perf_event_attr *attr = &evsel->core.attr;
0565     bool exclude_guest_default = false;
0566 
0567 #define MOD_PRINT(context, mod) do {                    \
0568         if (!attr->exclude_##context) {             \
0569             if (!colon) colon = ++r;            \
0570             r += scnprintf(bf + r, size - r, "%c", mod);    \
0571         } } while(0)
0572 
0573     if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv) {
0574         MOD_PRINT(kernel, 'k');
0575         MOD_PRINT(user, 'u');
0576         MOD_PRINT(hv, 'h');
0577         exclude_guest_default = true;
0578     }
0579 
0580     if (attr->precise_ip) {
0581         if (!colon)
0582             colon = ++r;
0583         r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp");
0584         exclude_guest_default = true;
0585     }
0586 
0587     if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) {
0588         MOD_PRINT(host, 'H');
0589         MOD_PRINT(guest, 'G');
0590     }
0591 #undef MOD_PRINT
0592     if (colon)
0593         bf[colon - 1] = ':';
0594     return r;
0595 }
0596 
0597 int __weak arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
0598 {
0599     return scnprintf(bf, size, "%s", __evsel__hw_name(evsel->core.attr.config));
0600 }
0601 
0602 static int evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
0603 {
0604     int r = arch_evsel__hw_name(evsel, bf, size);
0605     return r + evsel__add_modifiers(evsel, bf + r, size - r);
0606 }
0607 
0608 const char *const evsel__sw_names[PERF_COUNT_SW_MAX] = {
0609     "cpu-clock",
0610     "task-clock",
0611     "page-faults",
0612     "context-switches",
0613     "cpu-migrations",
0614     "minor-faults",
0615     "major-faults",
0616     "alignment-faults",
0617     "emulation-faults",
0618     "dummy",
0619 };
0620 
0621 static const char *__evsel__sw_name(u64 config)
0622 {
0623     if (config < PERF_COUNT_SW_MAX && evsel__sw_names[config])
0624         return evsel__sw_names[config];
0625     return "unknown-software";
0626 }
0627 
0628 static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size)
0629 {
0630     int r = scnprintf(bf, size, "%s", __evsel__sw_name(evsel->core.attr.config));
0631     return r + evsel__add_modifiers(evsel, bf + r, size - r);
0632 }
0633 
0634 static int evsel__tool_name(enum perf_tool_event ev, char *bf, size_t size)
0635 {
0636     return scnprintf(bf, size, "%s", perf_tool_event__to_str(ev));
0637 }
0638 
0639 static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
0640 {
0641     int r;
0642 
0643     r = scnprintf(bf, size, "mem:0x%" PRIx64 ":", addr);
0644 
0645     if (type & HW_BREAKPOINT_R)
0646         r += scnprintf(bf + r, size - r, "r");
0647 
0648     if (type & HW_BREAKPOINT_W)
0649         r += scnprintf(bf + r, size - r, "w");
0650 
0651     if (type & HW_BREAKPOINT_X)
0652         r += scnprintf(bf + r, size - r, "x");
0653 
0654     return r;
0655 }
0656 
0657 static int evsel__bp_name(struct evsel *evsel, char *bf, size_t size)
0658 {
0659     struct perf_event_attr *attr = &evsel->core.attr;
0660     int r = __evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
0661     return r + evsel__add_modifiers(evsel, bf + r, size - r);
0662 }
0663 
0664 const char *const evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = {
0665  { "L1-dcache", "l1-d",     "l1d",      "L1-data",      },
0666  { "L1-icache", "l1-i",     "l1i",      "L1-instruction",   },
0667  { "LLC",   "L2",                           },
0668  { "dTLB",  "d-tlb",    "Data-TLB",             },
0669  { "iTLB",  "i-tlb",    "Instruction-TLB",          },
0670  { "branch",    "branches", "bpu",      "btb",      "bpc",  },
0671  { "node",                              },
0672 };
0673 
0674 const char *const evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES] = {
0675  { "load",  "loads",    "read",                 },
0676  { "store", "stores",   "write",                },
0677  { "prefetch",  "prefetches",   "speculative-read", "speculative-load", },
0678 };
0679 
0680 const char *const evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES] = {
0681  { "refs",  "Reference",    "ops",      "access",       },
0682  { "misses",    "miss",                         },
0683 };
0684 
0685 #define C(x)        PERF_COUNT_HW_CACHE_##x
0686 #define CACHE_READ  (1 << C(OP_READ))
0687 #define CACHE_WRITE (1 << C(OP_WRITE))
0688 #define CACHE_PREFETCH  (1 << C(OP_PREFETCH))
0689 #define COP(x)      (1 << x)
0690 
0691 /*
0692  * cache operation stat
0693  * L1I : Read and prefetch only
0694  * ITLB and BPU : Read-only
0695  */
0696 static const unsigned long evsel__hw_cache_stat[C(MAX)] = {
0697  [C(L1D)]   = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
0698  [C(L1I)]   = (CACHE_READ | CACHE_PREFETCH),
0699  [C(LL)]    = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
0700  [C(DTLB)]  = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
0701  [C(ITLB)]  = (CACHE_READ),
0702  [C(BPU)]   = (CACHE_READ),
0703  [C(NODE)]  = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
0704 };
0705 
0706 bool evsel__is_cache_op_valid(u8 type, u8 op)
0707 {
0708     if (evsel__hw_cache_stat[type] & COP(op))
0709         return true;    /* valid */
0710     else
0711         return false;   /* invalid */
0712 }
0713 
0714 int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size)
0715 {
0716     if (result) {
0717         return scnprintf(bf, size, "%s-%s-%s", evsel__hw_cache[type][0],
0718                  evsel__hw_cache_op[op][0],
0719                  evsel__hw_cache_result[result][0]);
0720     }
0721 
0722     return scnprintf(bf, size, "%s-%s", evsel__hw_cache[type][0],
0723              evsel__hw_cache_op[op][1]);
0724 }
0725 
0726 static int __evsel__hw_cache_name(u64 config, char *bf, size_t size)
0727 {
0728     u8 op, result, type = (config >>  0) & 0xff;
0729     const char *err = "unknown-ext-hardware-cache-type";
0730 
0731     if (type >= PERF_COUNT_HW_CACHE_MAX)
0732         goto out_err;
0733 
0734     op = (config >>  8) & 0xff;
0735     err = "unknown-ext-hardware-cache-op";
0736     if (op >= PERF_COUNT_HW_CACHE_OP_MAX)
0737         goto out_err;
0738 
0739     result = (config >> 16) & 0xff;
0740     err = "unknown-ext-hardware-cache-result";
0741     if (result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
0742         goto out_err;
0743 
0744     err = "invalid-cache";
0745     if (!evsel__is_cache_op_valid(type, op))
0746         goto out_err;
0747 
0748     return __evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
0749 out_err:
0750     return scnprintf(bf, size, "%s", err);
0751 }
0752 
0753 static int evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size)
0754 {
0755     int ret = __evsel__hw_cache_name(evsel->core.attr.config, bf, size);
0756     return ret + evsel__add_modifiers(evsel, bf + ret, size - ret);
0757 }
0758 
0759 static int evsel__raw_name(struct evsel *evsel, char *bf, size_t size)
0760 {
0761     int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->core.attr.config);
0762     return ret + evsel__add_modifiers(evsel, bf + ret, size - ret);
0763 }
0764 
0765 const char *evsel__name(struct evsel *evsel)
0766 {
0767     char bf[128];
0768 
0769     if (!evsel)
0770         goto out_unknown;
0771 
0772     if (evsel->name)
0773         return evsel->name;
0774 
0775     switch (evsel->core.attr.type) {
0776     case PERF_TYPE_RAW:
0777         evsel__raw_name(evsel, bf, sizeof(bf));
0778         break;
0779 
0780     case PERF_TYPE_HARDWARE:
0781         evsel__hw_name(evsel, bf, sizeof(bf));
0782         break;
0783 
0784     case PERF_TYPE_HW_CACHE:
0785         evsel__hw_cache_name(evsel, bf, sizeof(bf));
0786         break;
0787 
0788     case PERF_TYPE_SOFTWARE:
0789         if (evsel__is_tool(evsel))
0790             evsel__tool_name(evsel->tool_event, bf, sizeof(bf));
0791         else
0792             evsel__sw_name(evsel, bf, sizeof(bf));
0793         break;
0794 
0795     case PERF_TYPE_TRACEPOINT:
0796         scnprintf(bf, sizeof(bf), "%s", "unknown tracepoint");
0797         break;
0798 
0799     case PERF_TYPE_BREAKPOINT:
0800         evsel__bp_name(evsel, bf, sizeof(bf));
0801         break;
0802 
0803     default:
0804         scnprintf(bf, sizeof(bf), "unknown attr type: %d",
0805               evsel->core.attr.type);
0806         break;
0807     }
0808 
0809     evsel->name = strdup(bf);
0810 
0811     if (evsel->name)
0812         return evsel->name;
0813 out_unknown:
0814     return "unknown";
0815 }
0816 
0817 const char *evsel__metric_id(const struct evsel *evsel)
0818 {
0819     if (evsel->metric_id)
0820         return evsel->metric_id;
0821 
0822     if (evsel__is_tool(evsel))
0823         return perf_tool_event__to_str(evsel->tool_event);
0824 
0825     return "unknown";
0826 }
0827 
0828 const char *evsel__group_name(struct evsel *evsel)
0829 {
0830     return evsel->group_name ?: "anon group";
0831 }
0832 
0833 /*
0834  * Returns the group details for the specified leader,
0835  * with following rules.
0836  *
0837  *  For record -e '{cycles,instructions}'
0838  *    'anon group { cycles:u, instructions:u }'
0839  *
0840  *  For record -e 'cycles,instructions' and report --group
0841  *    'cycles:u, instructions:u'
0842  */
0843 int evsel__group_desc(struct evsel *evsel, char *buf, size_t size)
0844 {
0845     int ret = 0;
0846     struct evsel *pos;
0847     const char *group_name = evsel__group_name(evsel);
0848 
0849     if (!evsel->forced_leader)
0850         ret = scnprintf(buf, size, "%s { ", group_name);
0851 
0852     ret += scnprintf(buf + ret, size - ret, "%s", evsel__name(evsel));
0853 
0854     for_each_group_member(pos, evsel)
0855         ret += scnprintf(buf + ret, size - ret, ", %s", evsel__name(pos));
0856 
0857     if (!evsel->forced_leader)
0858         ret += scnprintf(buf + ret, size - ret, " }");
0859 
0860     return ret;
0861 }
0862 
0863 static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
0864                       struct callchain_param *param)
0865 {
0866     bool function = evsel__is_function_event(evsel);
0867     struct perf_event_attr *attr = &evsel->core.attr;
0868 
0869     evsel__set_sample_bit(evsel, CALLCHAIN);
0870 
0871     attr->sample_max_stack = param->max_stack;
0872 
0873     if (opts->kernel_callchains)
0874         attr->exclude_callchain_user = 1;
0875     if (opts->user_callchains)
0876         attr->exclude_callchain_kernel = 1;
0877     if (param->record_mode == CALLCHAIN_LBR) {
0878         if (!opts->branch_stack) {
0879             if (attr->exclude_user) {
0880                 pr_warning("LBR callstack option is only available "
0881                        "to get user callchain information. "
0882                        "Falling back to framepointers.\n");
0883             } else {
0884                 evsel__set_sample_bit(evsel, BRANCH_STACK);
0885                 attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
0886                             PERF_SAMPLE_BRANCH_CALL_STACK |
0887                             PERF_SAMPLE_BRANCH_NO_CYCLES |
0888                             PERF_SAMPLE_BRANCH_NO_FLAGS |
0889                             PERF_SAMPLE_BRANCH_HW_INDEX;
0890             }
0891         } else
0892              pr_warning("Cannot use LBR callstack with branch stack. "
0893                     "Falling back to framepointers.\n");
0894     }
0895 
0896     if (param->record_mode == CALLCHAIN_DWARF) {
0897         if (!function) {
0898             evsel__set_sample_bit(evsel, REGS_USER);
0899             evsel__set_sample_bit(evsel, STACK_USER);
0900             if (opts->sample_user_regs && DWARF_MINIMAL_REGS != PERF_REGS_MASK) {
0901                 attr->sample_regs_user |= DWARF_MINIMAL_REGS;
0902                 pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, "
0903                        "specifying a subset with --user-regs may render DWARF unwinding unreliable, "
0904                        "so the minimal registers set (IP, SP) is explicitly forced.\n");
0905             } else {
0906                 attr->sample_regs_user |= arch__user_reg_mask();
0907             }
0908             attr->sample_stack_user = param->dump_size;
0909             attr->exclude_callchain_user = 1;
0910         } else {
0911             pr_info("Cannot use DWARF unwind for function trace event,"
0912                 " falling back to framepointers.\n");
0913         }
0914     }
0915 
0916     if (function) {
0917         pr_info("Disabling user space callchains for function trace event.\n");
0918         attr->exclude_callchain_user = 1;
0919     }
0920 }
0921 
0922 void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
0923                  struct callchain_param *param)
0924 {
0925     if (param->enabled)
0926         return __evsel__config_callchain(evsel, opts, param);
0927 }
0928 
0929 static void evsel__reset_callgraph(struct evsel *evsel, struct callchain_param *param)
0930 {
0931     struct perf_event_attr *attr = &evsel->core.attr;
0932 
0933     evsel__reset_sample_bit(evsel, CALLCHAIN);
0934     if (param->record_mode == CALLCHAIN_LBR) {
0935         evsel__reset_sample_bit(evsel, BRANCH_STACK);
0936         attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
0937                           PERF_SAMPLE_BRANCH_CALL_STACK |
0938                           PERF_SAMPLE_BRANCH_HW_INDEX);
0939     }
0940     if (param->record_mode == CALLCHAIN_DWARF) {
0941         evsel__reset_sample_bit(evsel, REGS_USER);
0942         evsel__reset_sample_bit(evsel, STACK_USER);
0943     }
0944 }
0945 
0946 static void evsel__apply_config_terms(struct evsel *evsel,
0947                       struct record_opts *opts, bool track)
0948 {
0949     struct evsel_config_term *term;
0950     struct list_head *config_terms = &evsel->config_terms;
0951     struct perf_event_attr *attr = &evsel->core.attr;
0952     /* callgraph default */
0953     struct callchain_param param = {
0954         .record_mode = callchain_param.record_mode,
0955     };
0956     u32 dump_size = 0;
0957     int max_stack = 0;
0958     const char *callgraph_buf = NULL;
0959 
0960     list_for_each_entry(term, config_terms, list) {
0961         switch (term->type) {
0962         case EVSEL__CONFIG_TERM_PERIOD:
0963             if (!(term->weak && opts->user_interval != ULLONG_MAX)) {
0964                 attr->sample_period = term->val.period;
0965                 attr->freq = 0;
0966                 evsel__reset_sample_bit(evsel, PERIOD);
0967             }
0968             break;
0969         case EVSEL__CONFIG_TERM_FREQ:
0970             if (!(term->weak && opts->user_freq != UINT_MAX)) {
0971                 attr->sample_freq = term->val.freq;
0972                 attr->freq = 1;
0973                 evsel__set_sample_bit(evsel, PERIOD);
0974             }
0975             break;
0976         case EVSEL__CONFIG_TERM_TIME:
0977             if (term->val.time)
0978                 evsel__set_sample_bit(evsel, TIME);
0979             else
0980                 evsel__reset_sample_bit(evsel, TIME);
0981             break;
0982         case EVSEL__CONFIG_TERM_CALLGRAPH:
0983             callgraph_buf = term->val.str;
0984             break;
0985         case EVSEL__CONFIG_TERM_BRANCH:
0986             if (term->val.str && strcmp(term->val.str, "no")) {
0987                 evsel__set_sample_bit(evsel, BRANCH_STACK);
0988                 parse_branch_str(term->val.str,
0989                          &attr->branch_sample_type);
0990             } else
0991                 evsel__reset_sample_bit(evsel, BRANCH_STACK);
0992             break;
0993         case EVSEL__CONFIG_TERM_STACK_USER:
0994             dump_size = term->val.stack_user;
0995             break;
0996         case EVSEL__CONFIG_TERM_MAX_STACK:
0997             max_stack = term->val.max_stack;
0998             break;
0999         case EVSEL__CONFIG_TERM_MAX_EVENTS:
1000             evsel->max_events = term->val.max_events;
1001             break;
1002         case EVSEL__CONFIG_TERM_INHERIT:
1003             /*
1004              * attr->inherit should has already been set by
1005              * evsel__config. If user explicitly set
1006              * inherit using config terms, override global
1007              * opt->no_inherit setting.
1008              */
1009             attr->inherit = term->val.inherit ? 1 : 0;
1010             break;
1011         case EVSEL__CONFIG_TERM_OVERWRITE:
1012             attr->write_backward = term->val.overwrite ? 1 : 0;
1013             break;
1014         case EVSEL__CONFIG_TERM_DRV_CFG:
1015             break;
1016         case EVSEL__CONFIG_TERM_PERCORE:
1017             break;
1018         case EVSEL__CONFIG_TERM_AUX_OUTPUT:
1019             attr->aux_output = term->val.aux_output ? 1 : 0;
1020             break;
1021         case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE:
1022             /* Already applied by auxtrace */
1023             break;
1024         case EVSEL__CONFIG_TERM_CFG_CHG:
1025             break;
1026         default:
1027             break;
1028         }
1029     }
1030 
1031     /* User explicitly set per-event callgraph, clear the old setting and reset. */
1032     if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) {
1033         bool sample_address = false;
1034 
1035         if (max_stack) {
1036             param.max_stack = max_stack;
1037             if (callgraph_buf == NULL)
1038                 callgraph_buf = "fp";
1039         }
1040 
1041         /* parse callgraph parameters */
1042         if (callgraph_buf != NULL) {
1043             if (!strcmp(callgraph_buf, "no")) {
1044                 param.enabled = false;
1045                 param.record_mode = CALLCHAIN_NONE;
1046             } else {
1047                 param.enabled = true;
1048                 if (parse_callchain_record(callgraph_buf, &param)) {
1049                     pr_err("per-event callgraph setting for %s failed. "
1050                            "Apply callgraph global setting for it\n",
1051                            evsel->name);
1052                     return;
1053                 }
1054                 if (param.record_mode == CALLCHAIN_DWARF)
1055                     sample_address = true;
1056             }
1057         }
1058         if (dump_size > 0) {
1059             dump_size = round_up(dump_size, sizeof(u64));
1060             param.dump_size = dump_size;
1061         }
1062 
1063         /* If global callgraph set, clear it */
1064         if (callchain_param.enabled)
1065             evsel__reset_callgraph(evsel, &callchain_param);
1066 
1067         /* set perf-event callgraph */
1068         if (param.enabled) {
1069             if (sample_address) {
1070                 evsel__set_sample_bit(evsel, ADDR);
1071                 evsel__set_sample_bit(evsel, DATA_SRC);
1072                 evsel->core.attr.mmap_data = track;
1073             }
1074             evsel__config_callchain(evsel, opts, &param);
1075         }
1076     }
1077 }
1078 
1079 struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type)
1080 {
1081     struct evsel_config_term *term, *found_term = NULL;
1082 
1083     list_for_each_entry(term, &evsel->config_terms, list) {
1084         if (term->type == type)
1085             found_term = term;
1086     }
1087 
1088     return found_term;
1089 }
1090 
1091 void __weak arch_evsel__set_sample_weight(struct evsel *evsel)
1092 {
1093     evsel__set_sample_bit(evsel, WEIGHT);
1094 }
1095 
1096 void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_unused)
1097 {
1098 }
1099 
1100 void __weak arch__post_evsel_config(struct evsel *evsel __maybe_unused,
1101                     struct perf_event_attr *attr __maybe_unused)
1102 {
1103 }
1104 
1105 static void evsel__set_default_freq_period(struct record_opts *opts,
1106                        struct perf_event_attr *attr)
1107 {
1108     if (opts->freq) {
1109         attr->freq = 1;
1110         attr->sample_freq = opts->freq;
1111     } else {
1112         attr->sample_period = opts->default_interval;
1113     }
1114 }
1115 
1116 static bool evsel__is_offcpu_event(struct evsel *evsel)
1117 {
1118     return evsel__is_bpf_output(evsel) && !strcmp(evsel->name, OFFCPU_EVENT);
1119 }
1120 
1121 /*
1122  * The enable_on_exec/disabled value strategy:
1123  *
1124  *  1) For any type of traced program:
1125  *    - all independent events and group leaders are disabled
1126  *    - all group members are enabled
1127  *
1128  *     Group members are ruled by group leaders. They need to
1129  *     be enabled, because the group scheduling relies on that.
1130  *
1131  *  2) For traced programs executed by perf:
1132  *     - all independent events and group leaders have
1133  *       enable_on_exec set
1134  *     - we don't specifically enable or disable any event during
1135  *       the record command
1136  *
1137  *     Independent events and group leaders are initially disabled
1138  *     and get enabled by exec. Group members are ruled by group
1139  *     leaders as stated in 1).
1140  *
1141  *  3) For traced programs attached by perf (pid/tid):
1142  *     - we specifically enable or disable all events during
1143  *       the record command
1144  *
1145  *     When attaching events to already running traced we
1146  *     enable/disable events specifically, as there's no
1147  *     initial traced exec call.
1148  */
1149 void evsel__config(struct evsel *evsel, struct record_opts *opts,
1150            struct callchain_param *callchain)
1151 {
1152     struct evsel *leader = evsel__leader(evsel);
1153     struct perf_event_attr *attr = &evsel->core.attr;
1154     int track = evsel->tracking;
1155     bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread;
1156 
1157     attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1;
1158     attr->inherit       = !opts->no_inherit;
1159     attr->write_backward = opts->overwrite ? 1 : 0;
1160 
1161     evsel__set_sample_bit(evsel, IP);
1162     evsel__set_sample_bit(evsel, TID);
1163 
1164     if (evsel->sample_read) {
1165         evsel__set_sample_bit(evsel, READ);
1166 
1167         /*
1168          * We need ID even in case of single event, because
1169          * PERF_SAMPLE_READ process ID specific data.
1170          */
1171         evsel__set_sample_id(evsel, false);
1172 
1173         /*
1174          * Apply group format only if we belong to group
1175          * with more than one members.
1176          */
1177         if (leader->core.nr_members > 1) {
1178             attr->read_format |= PERF_FORMAT_GROUP;
1179             attr->inherit = 0;
1180         }
1181     }
1182 
1183     /*
1184      * We default some events to have a default interval. But keep
1185      * it a weak assumption overridable by the user.
1186      */
1187     if ((evsel->is_libpfm_event && !attr->sample_period) ||
1188         (!evsel->is_libpfm_event && (!attr->sample_period ||
1189                      opts->user_freq != UINT_MAX ||
1190                      opts->user_interval != ULLONG_MAX)))
1191         evsel__set_default_freq_period(opts, attr);
1192 
1193     /*
1194      * If attr->freq was set (here or earlier), ask for period
1195      * to be sampled.
1196      */
1197     if (attr->freq)
1198         evsel__set_sample_bit(evsel, PERIOD);
1199 
1200     if (opts->no_samples)
1201         attr->sample_freq = 0;
1202 
1203     if (opts->inherit_stat) {
1204         evsel->core.attr.read_format |=
1205             PERF_FORMAT_TOTAL_TIME_ENABLED |
1206             PERF_FORMAT_TOTAL_TIME_RUNNING |
1207             PERF_FORMAT_ID;
1208         attr->inherit_stat = 1;
1209     }
1210 
1211     if (opts->sample_address) {
1212         evsel__set_sample_bit(evsel, ADDR);
1213         attr->mmap_data = track;
1214     }
1215 
1216     /*
1217      * We don't allow user space callchains for  function trace
1218      * event, due to issues with page faults while tracing page
1219      * fault handler and its overall trickiness nature.
1220      */
1221     if (evsel__is_function_event(evsel))
1222         evsel->core.attr.exclude_callchain_user = 1;
1223 
1224     if (callchain && callchain->enabled && !evsel->no_aux_samples)
1225         evsel__config_callchain(evsel, opts, callchain);
1226 
1227     if (opts->sample_intr_regs && !evsel->no_aux_samples &&
1228         !evsel__is_dummy_event(evsel)) {
1229         attr->sample_regs_intr = opts->sample_intr_regs;
1230         evsel__set_sample_bit(evsel, REGS_INTR);
1231     }
1232 
1233     if (opts->sample_user_regs && !evsel->no_aux_samples &&
1234         !evsel__is_dummy_event(evsel)) {
1235         attr->sample_regs_user |= opts->sample_user_regs;
1236         evsel__set_sample_bit(evsel, REGS_USER);
1237     }
1238 
1239     if (target__has_cpu(&opts->target) || opts->sample_cpu)
1240         evsel__set_sample_bit(evsel, CPU);
1241 
1242     /*
1243      * When the user explicitly disabled time don't force it here.
1244      */
1245     if (opts->sample_time &&
1246         (!perf_missing_features.sample_id_all &&
1247         (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu ||
1248          opts->sample_time_set)))
1249         evsel__set_sample_bit(evsel, TIME);
1250 
1251     if (opts->raw_samples && !evsel->no_aux_samples) {
1252         evsel__set_sample_bit(evsel, TIME);
1253         evsel__set_sample_bit(evsel, RAW);
1254         evsel__set_sample_bit(evsel, CPU);
1255     }
1256 
1257     if (opts->sample_address)
1258         evsel__set_sample_bit(evsel, DATA_SRC);
1259 
1260     if (opts->sample_phys_addr)
1261         evsel__set_sample_bit(evsel, PHYS_ADDR);
1262 
1263     if (opts->no_buffering) {
1264         attr->watermark = 0;
1265         attr->wakeup_events = 1;
1266     }
1267     if (opts->branch_stack && !evsel->no_aux_samples) {
1268         evsel__set_sample_bit(evsel, BRANCH_STACK);
1269         attr->branch_sample_type = opts->branch_stack;
1270     }
1271 
1272     if (opts->sample_weight)
1273         arch_evsel__set_sample_weight(evsel);
1274 
1275     attr->task     = track;
1276     attr->mmap     = track;
1277     attr->mmap2    = track && !perf_missing_features.mmap2;
1278     attr->comm     = track;
1279     attr->build_id = track && opts->build_id;
1280 
1281     /*
1282      * ksymbol is tracked separately with text poke because it needs to be
1283      * system wide and enabled immediately.
1284      */
1285     if (!opts->text_poke)
1286         attr->ksymbol = track && !perf_missing_features.ksymbol;
1287     attr->bpf_event = track && !opts->no_bpf_event && !perf_missing_features.bpf;
1288 
1289     if (opts->record_namespaces)
1290         attr->namespaces  = track;
1291 
1292     if (opts->record_cgroup) {
1293         attr->cgroup = track && !perf_missing_features.cgroup;
1294         evsel__set_sample_bit(evsel, CGROUP);
1295     }
1296 
1297     if (opts->sample_data_page_size)
1298         evsel__set_sample_bit(evsel, DATA_PAGE_SIZE);
1299 
1300     if (opts->sample_code_page_size)
1301         evsel__set_sample_bit(evsel, CODE_PAGE_SIZE);
1302 
1303     if (opts->record_switch_events)
1304         attr->context_switch = track;
1305 
1306     if (opts->sample_transaction)
1307         evsel__set_sample_bit(evsel, TRANSACTION);
1308 
1309     if (opts->running_time) {
1310         evsel->core.attr.read_format |=
1311             PERF_FORMAT_TOTAL_TIME_ENABLED |
1312             PERF_FORMAT_TOTAL_TIME_RUNNING;
1313     }
1314 
1315     /*
1316      * XXX see the function comment above
1317      *
1318      * Disabling only independent events or group leaders,
1319      * keeping group members enabled.
1320      */
1321     if (evsel__is_group_leader(evsel))
1322         attr->disabled = 1;
1323 
1324     /*
1325      * Setting enable_on_exec for independent events and
1326      * group leaders for traced executed by perf.
1327      */
1328     if (target__none(&opts->target) && evsel__is_group_leader(evsel) &&
1329         !opts->initial_delay)
1330         attr->enable_on_exec = 1;
1331 
1332     if (evsel->immediate) {
1333         attr->disabled = 0;
1334         attr->enable_on_exec = 0;
1335     }
1336 
1337     clockid = opts->clockid;
1338     if (opts->use_clockid) {
1339         attr->use_clockid = 1;
1340         attr->clockid = opts->clockid;
1341     }
1342 
1343     if (evsel->precise_max)
1344         attr->precise_ip = 3;
1345 
1346     if (opts->all_user) {
1347         attr->exclude_kernel = 1;
1348         attr->exclude_user   = 0;
1349     }
1350 
1351     if (opts->all_kernel) {
1352         attr->exclude_kernel = 0;
1353         attr->exclude_user   = 1;
1354     }
1355 
1356     if (evsel->core.own_cpus || evsel->unit)
1357         evsel->core.attr.read_format |= PERF_FORMAT_ID;
1358 
1359     /*
1360      * Apply event specific term settings,
1361      * it overloads any global configuration.
1362      */
1363     evsel__apply_config_terms(evsel, opts, track);
1364 
1365     evsel->ignore_missing_thread = opts->ignore_missing_thread;
1366 
1367     /* The --period option takes the precedence. */
1368     if (opts->period_set) {
1369         if (opts->period)
1370             evsel__set_sample_bit(evsel, PERIOD);
1371         else
1372             evsel__reset_sample_bit(evsel, PERIOD);
1373     }
1374 
1375     /*
1376      * A dummy event never triggers any actual counter and therefore
1377      * cannot be used with branch_stack.
1378      *
1379      * For initial_delay, a dummy event is added implicitly.
1380      * The software event will trigger -EOPNOTSUPP error out,
1381      * if BRANCH_STACK bit is set.
1382      */
1383     if (evsel__is_dummy_event(evsel))
1384         evsel__reset_sample_bit(evsel, BRANCH_STACK);
1385 
1386     if (evsel__is_offcpu_event(evsel))
1387         evsel->core.attr.sample_type &= OFFCPU_SAMPLE_TYPES;
1388 
1389     arch__post_evsel_config(evsel, attr);
1390 }
1391 
1392 int evsel__set_filter(struct evsel *evsel, const char *filter)
1393 {
1394     char *new_filter = strdup(filter);
1395 
1396     if (new_filter != NULL) {
1397         free(evsel->filter);
1398         evsel->filter = new_filter;
1399         return 0;
1400     }
1401 
1402     return -1;
1403 }
1404 
1405 static int evsel__append_filter(struct evsel *evsel, const char *fmt, const char *filter)
1406 {
1407     char *new_filter;
1408 
1409     if (evsel->filter == NULL)
1410         return evsel__set_filter(evsel, filter);
1411 
1412     if (asprintf(&new_filter, fmt, evsel->filter, filter) > 0) {
1413         free(evsel->filter);
1414         evsel->filter = new_filter;
1415         return 0;
1416     }
1417 
1418     return -1;
1419 }
1420 
1421 int evsel__append_tp_filter(struct evsel *evsel, const char *filter)
1422 {
1423     return evsel__append_filter(evsel, "(%s) && (%s)", filter);
1424 }
1425 
1426 int evsel__append_addr_filter(struct evsel *evsel, const char *filter)
1427 {
1428     return evsel__append_filter(evsel, "%s,%s", filter);
1429 }
1430 
1431 /* Caller has to clear disabled after going through all CPUs. */
1432 int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx)
1433 {
1434     return perf_evsel__enable_cpu(&evsel->core, cpu_map_idx);
1435 }
1436 
1437 int evsel__enable(struct evsel *evsel)
1438 {
1439     int err = perf_evsel__enable(&evsel->core);
1440 
1441     if (!err)
1442         evsel->disabled = false;
1443     return err;
1444 }
1445 
1446 /* Caller has to set disabled after going through all CPUs. */
1447 int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx)
1448 {
1449     return perf_evsel__disable_cpu(&evsel->core, cpu_map_idx);
1450 }
1451 
1452 int evsel__disable(struct evsel *evsel)
1453 {
1454     int err = perf_evsel__disable(&evsel->core);
1455     /*
1456      * We mark it disabled here so that tools that disable a event can
1457      * ignore events after they disable it. I.e. the ring buffer may have
1458      * already a few more events queued up before the kernel got the stop
1459      * request.
1460      */
1461     if (!err)
1462         evsel->disabled = true;
1463 
1464     return err;
1465 }
1466 
1467 void free_config_terms(struct list_head *config_terms)
1468 {
1469     struct evsel_config_term *term, *h;
1470 
1471     list_for_each_entry_safe(term, h, config_terms, list) {
1472         list_del_init(&term->list);
1473         if (term->free_str)
1474             zfree(&term->val.str);
1475         free(term);
1476     }
1477 }
1478 
1479 static void evsel__free_config_terms(struct evsel *evsel)
1480 {
1481     free_config_terms(&evsel->config_terms);
1482 }
1483 
1484 void evsel__exit(struct evsel *evsel)
1485 {
1486     assert(list_empty(&evsel->core.node));
1487     assert(evsel->evlist == NULL);
1488     bpf_counter__destroy(evsel);
1489     evsel__free_counts(evsel);
1490     perf_evsel__free_fd(&evsel->core);
1491     perf_evsel__free_id(&evsel->core);
1492     evsel__free_config_terms(evsel);
1493     cgroup__put(evsel->cgrp);
1494     perf_cpu_map__put(evsel->core.cpus);
1495     perf_cpu_map__put(evsel->core.own_cpus);
1496     perf_thread_map__put(evsel->core.threads);
1497     zfree(&evsel->group_name);
1498     zfree(&evsel->name);
1499     zfree(&evsel->pmu_name);
1500     zfree(&evsel->unit);
1501     zfree(&evsel->metric_id);
1502     evsel__zero_per_pkg(evsel);
1503     hashmap__free(evsel->per_pkg_mask);
1504     evsel->per_pkg_mask = NULL;
1505     zfree(&evsel->metric_events);
1506     perf_evsel__object.fini(evsel);
1507 }
1508 
1509 void evsel__delete(struct evsel *evsel)
1510 {
1511     evsel__exit(evsel);
1512     free(evsel);
1513 }
1514 
1515 void evsel__compute_deltas(struct evsel *evsel, int cpu_map_idx, int thread,
1516                struct perf_counts_values *count)
1517 {
1518     struct perf_counts_values tmp;
1519 
1520     if (!evsel->prev_raw_counts)
1521         return;
1522 
1523     if (cpu_map_idx == -1) {
1524         tmp = evsel->prev_raw_counts->aggr;
1525         evsel->prev_raw_counts->aggr = *count;
1526     } else {
1527         tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
1528         *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count;
1529     }
1530 
1531     count->val = count->val - tmp.val;
1532     count->ena = count->ena - tmp.ena;
1533     count->run = count->run - tmp.run;
1534 }
1535 
1536 static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread)
1537 {
1538     struct perf_counts_values *count = perf_counts(evsel->counts, cpu_map_idx, thread);
1539 
1540     return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
1541 }
1542 
1543 static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
1544                  u64 val, u64 ena, u64 run, u64 lost)
1545 {
1546     struct perf_counts_values *count;
1547 
1548     count = perf_counts(counter->counts, cpu_map_idx, thread);
1549 
1550     count->val    = val;
1551     count->ena    = ena;
1552     count->run    = run;
1553     count->lost   = lost;
1554 
1555     perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
1556 }
1557 
1558 static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int thread, u64 *data)
1559 {
1560     u64 read_format = leader->core.attr.read_format;
1561     struct sample_read_value *v;
1562     u64 nr, ena = 0, run = 0, lost = 0;
1563 
1564     nr = *data++;
1565 
1566     if (nr != (u64) leader->core.nr_members)
1567         return -EINVAL;
1568 
1569     if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1570         ena = *data++;
1571 
1572     if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1573         run = *data++;
1574 
1575     v = (void *)data;
1576     sample_read_group__for_each(v, nr, read_format) {
1577         struct evsel *counter;
1578 
1579         counter = evlist__id2evsel(leader->evlist, v->id);
1580         if (!counter)
1581             return -EINVAL;
1582 
1583         if (read_format & PERF_FORMAT_LOST)
1584             lost = v->lost;
1585 
1586         evsel__set_count(counter, cpu_map_idx, thread, v->value, ena, run, lost);
1587     }
1588 
1589     return 0;
1590 }
1591 
1592 static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread)
1593 {
1594     struct perf_stat_evsel *ps = leader->stats;
1595     u64 read_format = leader->core.attr.read_format;
1596     int size = perf_evsel__read_size(&leader->core);
1597     u64 *data = ps->group_data;
1598 
1599     if (!(read_format & PERF_FORMAT_ID))
1600         return -EINVAL;
1601 
1602     if (!evsel__is_group_leader(leader))
1603         return -EINVAL;
1604 
1605     if (!data) {
1606         data = zalloc(size);
1607         if (!data)
1608             return -ENOMEM;
1609 
1610         ps->group_data = data;
1611     }
1612 
1613     if (FD(leader, cpu_map_idx, thread) < 0)
1614         return -EINVAL;
1615 
1616     if (readn(FD(leader, cpu_map_idx, thread), data, size) <= 0)
1617         return -errno;
1618 
1619     return evsel__process_group_data(leader, cpu_map_idx, thread, data);
1620 }
1621 
1622 int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)
1623 {
1624     u64 read_format = evsel->core.attr.read_format;
1625 
1626     if (read_format & PERF_FORMAT_GROUP)
1627         return evsel__read_group(evsel, cpu_map_idx, thread);
1628 
1629     return evsel__read_one(evsel, cpu_map_idx, thread);
1630 }
1631 
1632 int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale)
1633 {
1634     struct perf_counts_values count;
1635     size_t nv = scale ? 3 : 1;
1636 
1637     if (FD(evsel, cpu_map_idx, thread) < 0)
1638         return -EINVAL;
1639 
1640     if (evsel->counts == NULL && evsel__alloc_counts(evsel) < 0)
1641         return -ENOMEM;
1642 
1643     if (readn(FD(evsel, cpu_map_idx, thread), &count, nv * sizeof(u64)) <= 0)
1644         return -errno;
1645 
1646     evsel__compute_deltas(evsel, cpu_map_idx, thread, &count);
1647     perf_counts_values__scale(&count, scale, NULL);
1648     *perf_counts(evsel->counts, cpu_map_idx, thread) = count;
1649     return 0;
1650 }
1651 
1652 static int evsel__match_other_cpu(struct evsel *evsel, struct evsel *other,
1653                   int cpu_map_idx)
1654 {
1655     struct perf_cpu cpu;
1656 
1657     cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx);
1658     return perf_cpu_map__idx(other->core.cpus, cpu);
1659 }
1660 
1661 static int evsel__hybrid_group_cpu_map_idx(struct evsel *evsel, int cpu_map_idx)
1662 {
1663     struct evsel *leader = evsel__leader(evsel);
1664 
1665     if ((evsel__is_hybrid(evsel) && !evsel__is_hybrid(leader)) ||
1666         (!evsel__is_hybrid(evsel) && evsel__is_hybrid(leader))) {
1667         return evsel__match_other_cpu(evsel, leader, cpu_map_idx);
1668     }
1669 
1670     return cpu_map_idx;
1671 }
1672 
1673 static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread)
1674 {
1675     struct evsel *leader = evsel__leader(evsel);
1676     int fd;
1677 
1678     if (evsel__is_group_leader(evsel))
1679         return -1;
1680 
1681     /*
1682      * Leader must be already processed/open,
1683      * if not it's a bug.
1684      */
1685     BUG_ON(!leader->core.fd);
1686 
1687     cpu_map_idx = evsel__hybrid_group_cpu_map_idx(evsel, cpu_map_idx);
1688     if (cpu_map_idx == -1)
1689         return -1;
1690 
1691     fd = FD(leader, cpu_map_idx, thread);
1692     BUG_ON(fd == -1);
1693 
1694     return fd;
1695 }
1696 
1697 static void evsel__remove_fd(struct evsel *pos, int nr_cpus, int nr_threads, int thread_idx)
1698 {
1699     for (int cpu = 0; cpu < nr_cpus; cpu++)
1700         for (int thread = thread_idx; thread < nr_threads - 1; thread++)
1701             FD(pos, cpu, thread) = FD(pos, cpu, thread + 1);
1702 }
1703 
1704 static int update_fds(struct evsel *evsel,
1705               int nr_cpus, int cpu_map_idx,
1706               int nr_threads, int thread_idx)
1707 {
1708     struct evsel *pos;
1709 
1710     if (cpu_map_idx >= nr_cpus || thread_idx >= nr_threads)
1711         return -EINVAL;
1712 
1713     evlist__for_each_entry(evsel->evlist, pos) {
1714         nr_cpus = pos != evsel ? nr_cpus : cpu_map_idx;
1715 
1716         evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx);
1717 
1718         /*
1719          * Since fds for next evsel has not been created,
1720          * there is no need to iterate whole event list.
1721          */
1722         if (pos == evsel)
1723             break;
1724     }
1725     return 0;
1726 }
1727 
1728 static bool evsel__ignore_missing_thread(struct evsel *evsel,
1729                      int nr_cpus, int cpu_map_idx,
1730                      struct perf_thread_map *threads,
1731                      int thread, int err)
1732 {
1733     pid_t ignore_pid = perf_thread_map__pid(threads, thread);
1734 
1735     if (!evsel->ignore_missing_thread)
1736         return false;
1737 
1738     /* The system wide setup does not work with threads. */
1739     if (evsel->core.system_wide)
1740         return false;
1741 
1742     /* The -ESRCH is perf event syscall errno for pid's not found. */
1743     if (err != -ESRCH)
1744         return false;
1745 
1746     /* If there's only one thread, let it fail. */
1747     if (threads->nr == 1)
1748         return false;
1749 
1750     /*
1751      * We should remove fd for missing_thread first
1752      * because thread_map__remove() will decrease threads->nr.
1753      */
1754     if (update_fds(evsel, nr_cpus, cpu_map_idx, threads->nr, thread))
1755         return false;
1756 
1757     if (thread_map__remove(threads, thread))
1758         return false;
1759 
1760     pr_warning("WARNING: Ignored open failure for pid %d\n",
1761            ignore_pid);
1762     return true;
1763 }
1764 
1765 static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,
1766                 void *priv __maybe_unused)
1767 {
1768     return fprintf(fp, "  %-32s %s\n", name, val);
1769 }
1770 
1771 static void display_attr(struct perf_event_attr *attr)
1772 {
1773     if (verbose >= 2 || debug_peo_args) {
1774         fprintf(stderr, "%.60s\n", graph_dotted_line);
1775         fprintf(stderr, "perf_event_attr:\n");
1776         perf_event_attr__fprintf(stderr, attr, __open_attr__fprintf, NULL);
1777         fprintf(stderr, "%.60s\n", graph_dotted_line);
1778     }
1779 }
1780 
1781 bool evsel__precise_ip_fallback(struct evsel *evsel)
1782 {
1783     /* Do not try less precise if not requested. */
1784     if (!evsel->precise_max)
1785         return false;
1786 
1787     /*
1788      * We tried all the precise_ip values, and it's
1789      * still failing, so leave it to standard fallback.
1790      */
1791     if (!evsel->core.attr.precise_ip) {
1792         evsel->core.attr.precise_ip = evsel->precise_ip_original;
1793         return false;
1794     }
1795 
1796     if (!evsel->precise_ip_original)
1797         evsel->precise_ip_original = evsel->core.attr.precise_ip;
1798 
1799     evsel->core.attr.precise_ip--;
1800     pr_debug2_peo("decreasing precise_ip by one (%d)\n", evsel->core.attr.precise_ip);
1801     display_attr(&evsel->core.attr);
1802     return true;
1803 }
1804 
1805 static struct perf_cpu_map *empty_cpu_map;
1806 static struct perf_thread_map *empty_thread_map;
1807 
1808 static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
1809         struct perf_thread_map *threads)
1810 {
1811     int nthreads;
1812 
1813     if ((perf_missing_features.write_backward && evsel->core.attr.write_backward) ||
1814         (perf_missing_features.aux_output     && evsel->core.attr.aux_output))
1815         return -EINVAL;
1816 
1817     if (cpus == NULL) {
1818         if (empty_cpu_map == NULL) {
1819             empty_cpu_map = perf_cpu_map__dummy_new();
1820             if (empty_cpu_map == NULL)
1821                 return -ENOMEM;
1822         }
1823 
1824         cpus = empty_cpu_map;
1825     }
1826 
1827     if (threads == NULL) {
1828         if (empty_thread_map == NULL) {
1829             empty_thread_map = thread_map__new_by_tid(-1);
1830             if (empty_thread_map == NULL)
1831                 return -ENOMEM;
1832         }
1833 
1834         threads = empty_thread_map;
1835     }
1836 
1837     if (evsel->core.system_wide)
1838         nthreads = 1;
1839     else
1840         nthreads = threads->nr;
1841 
1842     if (evsel->core.fd == NULL &&
1843         perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0)
1844         return -ENOMEM;
1845 
1846     evsel->open_flags = PERF_FLAG_FD_CLOEXEC;
1847     if (evsel->cgrp)
1848         evsel->open_flags |= PERF_FLAG_PID_CGROUP;
1849 
1850     return 0;
1851 }
1852 
1853 static void evsel__disable_missing_features(struct evsel *evsel)
1854 {
1855     if (perf_missing_features.weight_struct) {
1856         evsel__set_sample_bit(evsel, WEIGHT);
1857         evsel__reset_sample_bit(evsel, WEIGHT_STRUCT);
1858     }
1859     if (perf_missing_features.clockid_wrong)
1860         evsel->core.attr.clockid = CLOCK_MONOTONIC; /* should always work */
1861     if (perf_missing_features.clockid) {
1862         evsel->core.attr.use_clockid = 0;
1863         evsel->core.attr.clockid = 0;
1864     }
1865     if (perf_missing_features.cloexec)
1866         evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
1867     if (perf_missing_features.mmap2)
1868         evsel->core.attr.mmap2 = 0;
1869     if (evsel->pmu && evsel->pmu->missing_features.exclude_guest)
1870         evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0;
1871     if (perf_missing_features.lbr_flags)
1872         evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
1873                      PERF_SAMPLE_BRANCH_NO_CYCLES);
1874     if (perf_missing_features.group_read && evsel->core.attr.inherit)
1875         evsel->core.attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID);
1876     if (perf_missing_features.ksymbol)
1877         evsel->core.attr.ksymbol = 0;
1878     if (perf_missing_features.bpf)
1879         evsel->core.attr.bpf_event = 0;
1880     if (perf_missing_features.branch_hw_idx)
1881         evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_HW_INDEX;
1882     if (perf_missing_features.sample_id_all)
1883         evsel->core.attr.sample_id_all = 0;
1884 }
1885 
1886 int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
1887             struct perf_thread_map *threads)
1888 {
1889     int err;
1890 
1891     err = __evsel__prepare_open(evsel, cpus, threads);
1892     if (err)
1893         return err;
1894 
1895     evsel__disable_missing_features(evsel);
1896 
1897     return err;
1898 }
1899 
1900 bool evsel__detect_missing_features(struct evsel *evsel)
1901 {
1902     /*
1903      * Must probe features in the order they were added to the
1904      * perf_event_attr interface.
1905      */
1906     if (!perf_missing_features.weight_struct &&
1907         (evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) {
1908         perf_missing_features.weight_struct = true;
1909         pr_debug2("switching off weight struct support\n");
1910         return true;
1911     } else if (!perf_missing_features.code_page_size &&
1912         (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) {
1913         perf_missing_features.code_page_size = true;
1914         pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n");
1915         return false;
1916     } else if (!perf_missing_features.data_page_size &&
1917         (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) {
1918         perf_missing_features.data_page_size = true;
1919         pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n");
1920         return false;
1921     } else if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) {
1922         perf_missing_features.cgroup = true;
1923         pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n");
1924         return false;
1925     } else if (!perf_missing_features.branch_hw_idx &&
1926         (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) {
1927         perf_missing_features.branch_hw_idx = true;
1928         pr_debug2("switching off branch HW index support\n");
1929         return true;
1930     } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) {
1931         perf_missing_features.aux_output = true;
1932         pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n");
1933         return false;
1934     } else if (!perf_missing_features.bpf && evsel->core.attr.bpf_event) {
1935         perf_missing_features.bpf = true;
1936         pr_debug2_peo("switching off bpf_event\n");
1937         return true;
1938     } else if (!perf_missing_features.ksymbol && evsel->core.attr.ksymbol) {
1939         perf_missing_features.ksymbol = true;
1940         pr_debug2_peo("switching off ksymbol\n");
1941         return true;
1942     } else if (!perf_missing_features.write_backward && evsel->core.attr.write_backward) {
1943         perf_missing_features.write_backward = true;
1944         pr_debug2_peo("switching off write_backward\n");
1945         return false;
1946     } else if (!perf_missing_features.clockid_wrong && evsel->core.attr.use_clockid) {
1947         perf_missing_features.clockid_wrong = true;
1948         pr_debug2_peo("switching off clockid\n");
1949         return true;
1950     } else if (!perf_missing_features.clockid && evsel->core.attr.use_clockid) {
1951         perf_missing_features.clockid = true;
1952         pr_debug2_peo("switching off use_clockid\n");
1953         return true;
1954     } else if (!perf_missing_features.cloexec && (evsel->open_flags & PERF_FLAG_FD_CLOEXEC)) {
1955         perf_missing_features.cloexec = true;
1956         pr_debug2_peo("switching off cloexec flag\n");
1957         return true;
1958     } else if (!perf_missing_features.mmap2 && evsel->core.attr.mmap2) {
1959         perf_missing_features.mmap2 = true;
1960         pr_debug2_peo("switching off mmap2\n");
1961         return true;
1962     } else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) &&
1963            (evsel->pmu == NULL || evsel->pmu->missing_features.exclude_guest)) {
1964         if (evsel->pmu == NULL) {
1965             evsel->pmu = evsel__find_pmu(evsel);
1966             if (evsel->pmu)
1967                 evsel->pmu->missing_features.exclude_guest = true;
1968             else {
1969                 /* we cannot find PMU, disable attrs now */
1970                 evsel->core.attr.exclude_host = false;
1971                 evsel->core.attr.exclude_guest = false;
1972             }
1973         }
1974 
1975         if (evsel->exclude_GH) {
1976             pr_debug2_peo("PMU has no exclude_host/guest support, bailing out\n");
1977             return false;
1978         }
1979         if (!perf_missing_features.exclude_guest) {
1980             perf_missing_features.exclude_guest = true;
1981             pr_debug2_peo("switching off exclude_guest, exclude_host\n");
1982         }
1983         return true;
1984     } else if (!perf_missing_features.sample_id_all) {
1985         perf_missing_features.sample_id_all = true;
1986         pr_debug2_peo("switching off sample_id_all\n");
1987         return true;
1988     } else if (!perf_missing_features.lbr_flags &&
1989             (evsel->core.attr.branch_sample_type &
1990              (PERF_SAMPLE_BRANCH_NO_CYCLES |
1991               PERF_SAMPLE_BRANCH_NO_FLAGS))) {
1992         perf_missing_features.lbr_flags = true;
1993         pr_debug2_peo("switching off branch sample type no (cycles/flags)\n");
1994         return true;
1995     } else if (!perf_missing_features.group_read &&
1996             evsel->core.attr.inherit &&
1997            (evsel->core.attr.read_format & PERF_FORMAT_GROUP) &&
1998            evsel__is_group_leader(evsel)) {
1999         perf_missing_features.group_read = true;
2000         pr_debug2_peo("switching off group read\n");
2001         return true;
2002     } else {
2003         return false;
2004     }
2005 }
2006 
2007 bool evsel__increase_rlimit(enum rlimit_action *set_rlimit)
2008 {
2009     int old_errno;
2010     struct rlimit l;
2011 
2012     if (*set_rlimit < INCREASED_MAX) {
2013         old_errno = errno;
2014 
2015         if (getrlimit(RLIMIT_NOFILE, &l) == 0) {
2016             if (*set_rlimit == NO_CHANGE) {
2017                 l.rlim_cur = l.rlim_max;
2018             } else {
2019                 l.rlim_cur = l.rlim_max + 1000;
2020                 l.rlim_max = l.rlim_cur;
2021             }
2022             if (setrlimit(RLIMIT_NOFILE, &l) == 0) {
2023                 (*set_rlimit) += 1;
2024                 errno = old_errno;
2025                 return true;
2026             }
2027         }
2028         errno = old_errno;
2029     }
2030 
2031     return false;
2032 }
2033 
2034 static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
2035         struct perf_thread_map *threads,
2036         int start_cpu_map_idx, int end_cpu_map_idx)
2037 {
2038     int idx, thread, nthreads;
2039     int pid = -1, err, old_errno;
2040     enum rlimit_action set_rlimit = NO_CHANGE;
2041 
2042     err = __evsel__prepare_open(evsel, cpus, threads);
2043     if (err)
2044         return err;
2045 
2046     if (cpus == NULL)
2047         cpus = empty_cpu_map;
2048 
2049     if (threads == NULL)
2050         threads = empty_thread_map;
2051 
2052     if (evsel->core.system_wide)
2053         nthreads = 1;
2054     else
2055         nthreads = threads->nr;
2056 
2057     if (evsel->cgrp)
2058         pid = evsel->cgrp->fd;
2059 
2060 fallback_missing_features:
2061     evsel__disable_missing_features(evsel);
2062 
2063     display_attr(&evsel->core.attr);
2064 
2065     for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
2066 
2067         for (thread = 0; thread < nthreads; thread++) {
2068             int fd, group_fd;
2069 retry_open:
2070             if (thread >= nthreads)
2071                 break;
2072 
2073             if (!evsel->cgrp && !evsel->core.system_wide)
2074                 pid = perf_thread_map__pid(threads, thread);
2075 
2076             group_fd = get_group_fd(evsel, idx, thread);
2077 
2078             test_attr__ready();
2079 
2080             pr_debug2_peo("sys_perf_event_open: pid %d  cpu %d  group_fd %d  flags %#lx",
2081                 pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags);
2082 
2083             fd = sys_perf_event_open(&evsel->core.attr, pid,
2084                         perf_cpu_map__cpu(cpus, idx).cpu,
2085                         group_fd, evsel->open_flags);
2086 
2087             FD(evsel, idx, thread) = fd;
2088 
2089             if (fd < 0) {
2090                 err = -errno;
2091 
2092                 pr_debug2_peo("\nsys_perf_event_open failed, error %d\n",
2093                       err);
2094                 goto try_fallback;
2095             }
2096 
2097             bpf_counter__install_pe(evsel, idx, fd);
2098 
2099             if (unlikely(test_attr__enabled)) {
2100                 test_attr__open(&evsel->core.attr, pid,
2101                         perf_cpu_map__cpu(cpus, idx),
2102                         fd, group_fd, evsel->open_flags);
2103             }
2104 
2105             pr_debug2_peo(" = %d\n", fd);
2106 
2107             if (evsel->bpf_fd >= 0) {
2108                 int evt_fd = fd;
2109                 int bpf_fd = evsel->bpf_fd;
2110 
2111                 err = ioctl(evt_fd,
2112                         PERF_EVENT_IOC_SET_BPF,
2113                         bpf_fd);
2114                 if (err && errno != EEXIST) {
2115                     pr_err("failed to attach bpf fd %d: %s\n",
2116                            bpf_fd, strerror(errno));
2117                     err = -EINVAL;
2118                     goto out_close;
2119                 }
2120             }
2121 
2122             set_rlimit = NO_CHANGE;
2123 
2124             /*
2125              * If we succeeded but had to kill clockid, fail and
2126              * have evsel__open_strerror() print us a nice error.
2127              */
2128             if (perf_missing_features.clockid ||
2129                 perf_missing_features.clockid_wrong) {
2130                 err = -EINVAL;
2131                 goto out_close;
2132             }
2133         }
2134     }
2135 
2136     return 0;
2137 
2138 try_fallback:
2139     if (evsel__precise_ip_fallback(evsel))
2140         goto retry_open;
2141 
2142     if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus),
2143                      idx, threads, thread, err)) {
2144         /* We just removed 1 thread, so lower the upper nthreads limit. */
2145         nthreads--;
2146 
2147         /* ... and pretend like nothing have happened. */
2148         err = 0;
2149         goto retry_open;
2150     }
2151     /*
2152      * perf stat needs between 5 and 22 fds per CPU. When we run out
2153      * of them try to increase the limits.
2154      */
2155     if (err == -EMFILE && evsel__increase_rlimit(&set_rlimit))
2156         goto retry_open;
2157 
2158     if (err != -EINVAL || idx > 0 || thread > 0)
2159         goto out_close;
2160 
2161     if (evsel__detect_missing_features(evsel))
2162         goto fallback_missing_features;
2163 out_close:
2164     if (err)
2165         threads->err_thread = thread;
2166 
2167     old_errno = errno;
2168     do {
2169         while (--thread >= 0) {
2170             if (FD(evsel, idx, thread) >= 0)
2171                 close(FD(evsel, idx, thread));
2172             FD(evsel, idx, thread) = -1;
2173         }
2174         thread = nthreads;
2175     } while (--idx >= 0);
2176     errno = old_errno;
2177     return err;
2178 }
2179 
2180 int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
2181         struct perf_thread_map *threads)
2182 {
2183     return evsel__open_cpu(evsel, cpus, threads, 0, perf_cpu_map__nr(cpus));
2184 }
2185 
2186 void evsel__close(struct evsel *evsel)
2187 {
2188     perf_evsel__close(&evsel->core);
2189     perf_evsel__free_id(&evsel->core);
2190 }
2191 
2192 int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx)
2193 {
2194     if (cpu_map_idx == -1)
2195         return evsel__open_cpu(evsel, cpus, NULL, 0, perf_cpu_map__nr(cpus));
2196 
2197     return evsel__open_cpu(evsel, cpus, NULL, cpu_map_idx, cpu_map_idx + 1);
2198 }
2199 
2200 int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads)
2201 {
2202     return evsel__open(evsel, NULL, threads);
2203 }
2204 
2205 static int perf_evsel__parse_id_sample(const struct evsel *evsel,
2206                        const union perf_event *event,
2207                        struct perf_sample *sample)
2208 {
2209     u64 type = evsel->core.attr.sample_type;
2210     const __u64 *array = event->sample.array;
2211     bool swapped = evsel->needs_swap;
2212     union u64_swap u;
2213 
2214     array += ((event->header.size -
2215            sizeof(event->header)) / sizeof(u64)) - 1;
2216 
2217     if (type & PERF_SAMPLE_IDENTIFIER) {
2218         sample->id = *array;
2219         array--;
2220     }
2221 
2222     if (type & PERF_SAMPLE_CPU) {
2223         u.val64 = *array;
2224         if (swapped) {
2225             /* undo swap of u64, then swap on individual u32s */
2226             u.val64 = bswap_64(u.val64);
2227             u.val32[0] = bswap_32(u.val32[0]);
2228         }
2229 
2230         sample->cpu = u.val32[0];
2231         array--;
2232     }
2233 
2234     if (type & PERF_SAMPLE_STREAM_ID) {
2235         sample->stream_id = *array;
2236         array--;
2237     }
2238 
2239     if (type & PERF_SAMPLE_ID) {
2240         sample->id = *array;
2241         array--;
2242     }
2243 
2244     if (type & PERF_SAMPLE_TIME) {
2245         sample->time = *array;
2246         array--;
2247     }
2248 
2249     if (type & PERF_SAMPLE_TID) {
2250         u.val64 = *array;
2251         if (swapped) {
2252             /* undo swap of u64, then swap on individual u32s */
2253             u.val64 = bswap_64(u.val64);
2254             u.val32[0] = bswap_32(u.val32[0]);
2255             u.val32[1] = bswap_32(u.val32[1]);
2256         }
2257 
2258         sample->pid = u.val32[0];
2259         sample->tid = u.val32[1];
2260         array--;
2261     }
2262 
2263     return 0;
2264 }
2265 
2266 static inline bool overflow(const void *endp, u16 max_size, const void *offset,
2267                 u64 size)
2268 {
2269     return size > max_size || offset + size > endp;
2270 }
2271 
2272 #define OVERFLOW_CHECK(offset, size, max_size)              \
2273     do {                                \
2274         if (overflow(endp, (max_size), (offset), (size)))   \
2275             return -EFAULT;                 \
2276     } while (0)
2277 
2278 #define OVERFLOW_CHECK_u64(offset) \
2279     OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64))
2280 
2281 static int
2282 perf_event__check_size(union perf_event *event, unsigned int sample_size)
2283 {
2284     /*
2285      * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes
2286      * up to PERF_SAMPLE_PERIOD.  After that overflow() must be used to
2287      * check the format does not go past the end of the event.
2288      */
2289     if (sample_size + sizeof(event->header) > event->header.size)
2290         return -EFAULT;
2291 
2292     return 0;
2293 }
2294 
2295 void __weak arch_perf_parse_sample_weight(struct perf_sample *data,
2296                       const __u64 *array,
2297                       u64 type __maybe_unused)
2298 {
2299     data->weight = *array;
2300 }
2301 
2302 u64 evsel__bitfield_swap_branch_flags(u64 value)
2303 {
2304     u64 new_val = 0;
2305 
2306     /*
2307      * branch_flags
2308      * union {
2309      *  u64 values;
2310      *  struct {
2311      *      mispred:1   //target mispredicted
2312      *      predicted:1 //target predicted
2313      *      in_tx:1     //in transaction
2314      *      abort:1     //transaction abort
2315      *      cycles:16   //cycle count to last branch
2316      *      type:4      //branch type
2317      *      reserved:40
2318      *  }
2319      * }
2320      *
2321      * Avoid bswap64() the entire branch_flag.value,
2322      * as it has variable bit-field sizes. Instead the
2323      * macro takes the bit-field position/size,
2324      * swaps it based on the host endianness.
2325      *
2326      * tep_is_bigendian() is used here instead of
2327      * bigendian() to avoid python test fails.
2328      */
2329     if (tep_is_bigendian()) {
2330         new_val = bitfield_swap(value, 0, 1);
2331         new_val |= bitfield_swap(value, 1, 1);
2332         new_val |= bitfield_swap(value, 2, 1);
2333         new_val |= bitfield_swap(value, 3, 1);
2334         new_val |= bitfield_swap(value, 4, 16);
2335         new_val |= bitfield_swap(value, 20, 4);
2336         new_val |= bitfield_swap(value, 24, 40);
2337     } else {
2338         new_val = bitfield_swap(value, 63, 1);
2339         new_val |= bitfield_swap(value, 62, 1);
2340         new_val |= bitfield_swap(value, 61, 1);
2341         new_val |= bitfield_swap(value, 60, 1);
2342         new_val |= bitfield_swap(value, 44, 16);
2343         new_val |= bitfield_swap(value, 40, 4);
2344         new_val |= bitfield_swap(value, 0, 40);
2345     }
2346 
2347     return new_val;
2348 }
2349 
2350 int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
2351             struct perf_sample *data)
2352 {
2353     u64 type = evsel->core.attr.sample_type;
2354     bool swapped = evsel->needs_swap;
2355     const __u64 *array;
2356     u16 max_size = event->header.size;
2357     const void *endp = (void *)event + max_size;
2358     u64 sz;
2359 
2360     /*
2361      * used for cross-endian analysis. See git commit 65014ab3
2362      * for why this goofiness is needed.
2363      */
2364     union u64_swap u;
2365 
2366     memset(data, 0, sizeof(*data));
2367     data->cpu = data->pid = data->tid = -1;
2368     data->stream_id = data->id = data->time = -1ULL;
2369     data->period = evsel->core.attr.sample_period;
2370     data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2371     data->misc    = event->header.misc;
2372     data->id = -1ULL;
2373     data->data_src = PERF_MEM_DATA_SRC_NONE;
2374     data->vcpu = -1;
2375 
2376     if (event->header.type != PERF_RECORD_SAMPLE) {
2377         if (!evsel->core.attr.sample_id_all)
2378             return 0;
2379         return perf_evsel__parse_id_sample(evsel, event, data);
2380     }
2381 
2382     array = event->sample.array;
2383 
2384     if (perf_event__check_size(event, evsel->sample_size))
2385         return -EFAULT;
2386 
2387     if (type & PERF_SAMPLE_IDENTIFIER) {
2388         data->id = *array;
2389         array++;
2390     }
2391 
2392     if (type & PERF_SAMPLE_IP) {
2393         data->ip = *array;
2394         array++;
2395     }
2396 
2397     if (type & PERF_SAMPLE_TID) {
2398         u.val64 = *array;
2399         if (swapped) {
2400             /* undo swap of u64, then swap on individual u32s */
2401             u.val64 = bswap_64(u.val64);
2402             u.val32[0] = bswap_32(u.val32[0]);
2403             u.val32[1] = bswap_32(u.val32[1]);
2404         }
2405 
2406         data->pid = u.val32[0];
2407         data->tid = u.val32[1];
2408         array++;
2409     }
2410 
2411     if (type & PERF_SAMPLE_TIME) {
2412         data->time = *array;
2413         array++;
2414     }
2415 
2416     if (type & PERF_SAMPLE_ADDR) {
2417         data->addr = *array;
2418         array++;
2419     }
2420 
2421     if (type & PERF_SAMPLE_ID) {
2422         data->id = *array;
2423         array++;
2424     }
2425 
2426     if (type & PERF_SAMPLE_STREAM_ID) {
2427         data->stream_id = *array;
2428         array++;
2429     }
2430 
2431     if (type & PERF_SAMPLE_CPU) {
2432 
2433         u.val64 = *array;
2434         if (swapped) {
2435             /* undo swap of u64, then swap on individual u32s */
2436             u.val64 = bswap_64(u.val64);
2437             u.val32[0] = bswap_32(u.val32[0]);
2438         }
2439 
2440         data->cpu = u.val32[0];
2441         array++;
2442     }
2443 
2444     if (type & PERF_SAMPLE_PERIOD) {
2445         data->period = *array;
2446         array++;
2447     }
2448 
2449     if (type & PERF_SAMPLE_READ) {
2450         u64 read_format = evsel->core.attr.read_format;
2451 
2452         OVERFLOW_CHECK_u64(array);
2453         if (read_format & PERF_FORMAT_GROUP)
2454             data->read.group.nr = *array;
2455         else
2456             data->read.one.value = *array;
2457 
2458         array++;
2459 
2460         if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
2461             OVERFLOW_CHECK_u64(array);
2462             data->read.time_enabled = *array;
2463             array++;
2464         }
2465 
2466         if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
2467             OVERFLOW_CHECK_u64(array);
2468             data->read.time_running = *array;
2469             array++;
2470         }
2471 
2472         /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
2473         if (read_format & PERF_FORMAT_GROUP) {
2474             const u64 max_group_nr = UINT64_MAX /
2475                     sizeof(struct sample_read_value);
2476 
2477             if (data->read.group.nr > max_group_nr)
2478                 return -EFAULT;
2479 
2480             sz = data->read.group.nr * sample_read_value_size(read_format);
2481             OVERFLOW_CHECK(array, sz, max_size);
2482             data->read.group.values =
2483                     (struct sample_read_value *)array;
2484             array = (void *)array + sz;
2485         } else {
2486             OVERFLOW_CHECK_u64(array);
2487             data->read.one.id = *array;
2488             array++;
2489 
2490             if (read_format & PERF_FORMAT_LOST) {
2491                 OVERFLOW_CHECK_u64(array);
2492                 data->read.one.lost = *array;
2493                 array++;
2494             }
2495         }
2496     }
2497 
2498     if (type & PERF_SAMPLE_CALLCHAIN) {
2499         const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
2500 
2501         OVERFLOW_CHECK_u64(array);
2502         data->callchain = (struct ip_callchain *)array++;
2503         if (data->callchain->nr > max_callchain_nr)
2504             return -EFAULT;
2505         sz = data->callchain->nr * sizeof(u64);
2506         OVERFLOW_CHECK(array, sz, max_size);
2507         array = (void *)array + sz;
2508     }
2509 
2510     if (type & PERF_SAMPLE_RAW) {
2511         OVERFLOW_CHECK_u64(array);
2512         u.val64 = *array;
2513 
2514         /*
2515          * Undo swap of u64, then swap on individual u32s,
2516          * get the size of the raw area and undo all of the
2517          * swap. The pevent interface handles endianness by
2518          * itself.
2519          */
2520         if (swapped) {
2521             u.val64 = bswap_64(u.val64);
2522             u.val32[0] = bswap_32(u.val32[0]);
2523             u.val32[1] = bswap_32(u.val32[1]);
2524         }
2525         data->raw_size = u.val32[0];
2526 
2527         /*
2528          * The raw data is aligned on 64bits including the
2529          * u32 size, so it's safe to use mem_bswap_64.
2530          */
2531         if (swapped)
2532             mem_bswap_64((void *) array, data->raw_size);
2533 
2534         array = (void *)array + sizeof(u32);
2535 
2536         OVERFLOW_CHECK(array, data->raw_size, max_size);
2537         data->raw_data = (void *)array;
2538         array = (void *)array + data->raw_size;
2539     }
2540 
2541     if (type & PERF_SAMPLE_BRANCH_STACK) {
2542         const u64 max_branch_nr = UINT64_MAX /
2543                       sizeof(struct branch_entry);
2544         struct branch_entry *e;
2545         unsigned int i;
2546 
2547         OVERFLOW_CHECK_u64(array);
2548         data->branch_stack = (struct branch_stack *)array++;
2549 
2550         if (data->branch_stack->nr > max_branch_nr)
2551             return -EFAULT;
2552 
2553         sz = data->branch_stack->nr * sizeof(struct branch_entry);
2554         if (evsel__has_branch_hw_idx(evsel)) {
2555             sz += sizeof(u64);
2556             e = &data->branch_stack->entries[0];
2557         } else {
2558             data->no_hw_idx = true;
2559             /*
2560              * if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied,
2561              * only nr and entries[] will be output by kernel.
2562              */
2563             e = (struct branch_entry *)&data->branch_stack->hw_idx;
2564         }
2565 
2566         if (swapped) {
2567             /*
2568              * struct branch_flag does not have endian
2569              * specific bit field definition. And bswap
2570              * will not resolve the issue, since these
2571              * are bit fields.
2572              *
2573              * evsel__bitfield_swap_branch_flags() uses a
2574              * bitfield_swap macro to swap the bit position
2575              * based on the host endians.
2576              */
2577             for (i = 0; i < data->branch_stack->nr; i++, e++)
2578                 e->flags.value = evsel__bitfield_swap_branch_flags(e->flags.value);
2579         }
2580 
2581         OVERFLOW_CHECK(array, sz, max_size);
2582         array = (void *)array + sz;
2583     }
2584 
2585     if (type & PERF_SAMPLE_REGS_USER) {
2586         OVERFLOW_CHECK_u64(array);
2587         data->user_regs.abi = *array;
2588         array++;
2589 
2590         if (data->user_regs.abi) {
2591             u64 mask = evsel->core.attr.sample_regs_user;
2592 
2593             sz = hweight64(mask) * sizeof(u64);
2594             OVERFLOW_CHECK(array, sz, max_size);
2595             data->user_regs.mask = mask;
2596             data->user_regs.regs = (u64 *)array;
2597             array = (void *)array + sz;
2598         }
2599     }
2600 
2601     if (type & PERF_SAMPLE_STACK_USER) {
2602         OVERFLOW_CHECK_u64(array);
2603         sz = *array++;
2604 
2605         data->user_stack.offset = ((char *)(array - 1)
2606                       - (char *) event);
2607 
2608         if (!sz) {
2609             data->user_stack.size = 0;
2610         } else {
2611             OVERFLOW_CHECK(array, sz, max_size);
2612             data->user_stack.data = (char *)array;
2613             array = (void *)array + sz;
2614             OVERFLOW_CHECK_u64(array);
2615             data->user_stack.size = *array++;
2616             if (WARN_ONCE(data->user_stack.size > sz,
2617                       "user stack dump failure\n"))
2618                 return -EFAULT;
2619         }
2620     }
2621 
2622     if (type & PERF_SAMPLE_WEIGHT_TYPE) {
2623         OVERFLOW_CHECK_u64(array);
2624         arch_perf_parse_sample_weight(data, array, type);
2625         array++;
2626     }
2627 
2628     if (type & PERF_SAMPLE_DATA_SRC) {
2629         OVERFLOW_CHECK_u64(array);
2630         data->data_src = *array;
2631         array++;
2632     }
2633 
2634     if (type & PERF_SAMPLE_TRANSACTION) {
2635         OVERFLOW_CHECK_u64(array);
2636         data->transaction = *array;
2637         array++;
2638     }
2639 
2640     data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE;
2641     if (type & PERF_SAMPLE_REGS_INTR) {
2642         OVERFLOW_CHECK_u64(array);
2643         data->intr_regs.abi = *array;
2644         array++;
2645 
2646         if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) {
2647             u64 mask = evsel->core.attr.sample_regs_intr;
2648 
2649             sz = hweight64(mask) * sizeof(u64);
2650             OVERFLOW_CHECK(array, sz, max_size);
2651             data->intr_regs.mask = mask;
2652             data->intr_regs.regs = (u64 *)array;
2653             array = (void *)array + sz;
2654         }
2655     }
2656 
2657     data->phys_addr = 0;
2658     if (type & PERF_SAMPLE_PHYS_ADDR) {
2659         data->phys_addr = *array;
2660         array++;
2661     }
2662 
2663     data->cgroup = 0;
2664     if (type & PERF_SAMPLE_CGROUP) {
2665         data->cgroup = *array;
2666         array++;
2667     }
2668 
2669     data->data_page_size = 0;
2670     if (type & PERF_SAMPLE_DATA_PAGE_SIZE) {
2671         data->data_page_size = *array;
2672         array++;
2673     }
2674 
2675     data->code_page_size = 0;
2676     if (type & PERF_SAMPLE_CODE_PAGE_SIZE) {
2677         data->code_page_size = *array;
2678         array++;
2679     }
2680 
2681     if (type & PERF_SAMPLE_AUX) {
2682         OVERFLOW_CHECK_u64(array);
2683         sz = *array++;
2684 
2685         OVERFLOW_CHECK(array, sz, max_size);
2686         /* Undo swap of data */
2687         if (swapped)
2688             mem_bswap_64((char *)array, sz);
2689         data->aux_sample.size = sz;
2690         data->aux_sample.data = (char *)array;
2691         array = (void *)array + sz;
2692     }
2693 
2694     return 0;
2695 }
2696 
2697 int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event,
2698                   u64 *timestamp)
2699 {
2700     u64 type = evsel->core.attr.sample_type;
2701     const __u64 *array;
2702 
2703     if (!(type & PERF_SAMPLE_TIME))
2704         return -1;
2705 
2706     if (event->header.type != PERF_RECORD_SAMPLE) {
2707         struct perf_sample data = {
2708             .time = -1ULL,
2709         };
2710 
2711         if (!evsel->core.attr.sample_id_all)
2712             return -1;
2713         if (perf_evsel__parse_id_sample(evsel, event, &data))
2714             return -1;
2715 
2716         *timestamp = data.time;
2717         return 0;
2718     }
2719 
2720     array = event->sample.array;
2721 
2722     if (perf_event__check_size(event, evsel->sample_size))
2723         return -EFAULT;
2724 
2725     if (type & PERF_SAMPLE_IDENTIFIER)
2726         array++;
2727 
2728     if (type & PERF_SAMPLE_IP)
2729         array++;
2730 
2731     if (type & PERF_SAMPLE_TID)
2732         array++;
2733 
2734     if (type & PERF_SAMPLE_TIME)
2735         *timestamp = *array;
2736 
2737     return 0;
2738 }
2739 
2740 u16 evsel__id_hdr_size(struct evsel *evsel)
2741 {
2742     u64 sample_type = evsel->core.attr.sample_type;
2743     u16 size = 0;
2744 
2745     if (sample_type & PERF_SAMPLE_TID)
2746         size += sizeof(u64);
2747 
2748     if (sample_type & PERF_SAMPLE_TIME)
2749         size += sizeof(u64);
2750 
2751     if (sample_type & PERF_SAMPLE_ID)
2752         size += sizeof(u64);
2753 
2754     if (sample_type & PERF_SAMPLE_STREAM_ID)
2755         size += sizeof(u64);
2756 
2757     if (sample_type & PERF_SAMPLE_CPU)
2758         size += sizeof(u64);
2759 
2760     if (sample_type & PERF_SAMPLE_IDENTIFIER)
2761         size += sizeof(u64);
2762 
2763     return size;
2764 }
2765 
2766 struct tep_format_field *evsel__field(struct evsel *evsel, const char *name)
2767 {
2768     return tep_find_field(evsel->tp_format, name);
2769 }
2770 
2771 void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name)
2772 {
2773     struct tep_format_field *field = evsel__field(evsel, name);
2774     int offset;
2775 
2776     if (!field)
2777         return NULL;
2778 
2779     offset = field->offset;
2780 
2781     if (field->flags & TEP_FIELD_IS_DYNAMIC) {
2782         offset = *(int *)(sample->raw_data + field->offset);
2783         offset &= 0xffff;
2784         if (field->flags & TEP_FIELD_IS_RELATIVE)
2785             offset += field->offset + field->size;
2786     }
2787 
2788     return sample->raw_data + offset;
2789 }
2790 
2791 u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample,
2792              bool needs_swap)
2793 {
2794     u64 value;
2795     void *ptr = sample->raw_data + field->offset;
2796 
2797     switch (field->size) {
2798     case 1:
2799         return *(u8 *)ptr;
2800     case 2:
2801         value = *(u16 *)ptr;
2802         break;
2803     case 4:
2804         value = *(u32 *)ptr;
2805         break;
2806     case 8:
2807         memcpy(&value, ptr, sizeof(u64));
2808         break;
2809     default:
2810         return 0;
2811     }
2812 
2813     if (!needs_swap)
2814         return value;
2815 
2816     switch (field->size) {
2817     case 2:
2818         return bswap_16(value);
2819     case 4:
2820         return bswap_32(value);
2821     case 8:
2822         return bswap_64(value);
2823     default:
2824         return 0;
2825     }
2826 
2827     return 0;
2828 }
2829 
2830 u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name)
2831 {
2832     struct tep_format_field *field = evsel__field(evsel, name);
2833 
2834     if (!field)
2835         return 0;
2836 
2837     return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
2838 }
2839 
2840 bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize)
2841 {
2842     int paranoid;
2843 
2844     if ((err == ENOENT || err == ENXIO || err == ENODEV) &&
2845         evsel->core.attr.type   == PERF_TYPE_HARDWARE &&
2846         evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES) {
2847         /*
2848          * If it's cycles then fall back to hrtimer based
2849          * cpu-clock-tick sw counter, which is always available even if
2850          * no PMU support.
2851          *
2852          * PPC returns ENXIO until 2.6.37 (behavior changed with commit
2853          * b0a873e).
2854          */
2855         scnprintf(msg, msgsize, "%s",
2856 "The cycles event is not supported, trying to fall back to cpu-clock-ticks");
2857 
2858         evsel->core.attr.type   = PERF_TYPE_SOFTWARE;
2859         evsel->core.attr.config = PERF_COUNT_SW_CPU_CLOCK;
2860 
2861         zfree(&evsel->name);
2862         return true;
2863     } else if (err == EACCES && !evsel->core.attr.exclude_kernel &&
2864            (paranoid = perf_event_paranoid()) > 1) {
2865         const char *name = evsel__name(evsel);
2866         char *new_name;
2867         const char *sep = ":";
2868 
2869         /* If event has exclude user then don't exclude kernel. */
2870         if (evsel->core.attr.exclude_user)
2871             return false;
2872 
2873         /* Is there already the separator in the name. */
2874         if (strchr(name, '/') ||
2875             (strchr(name, ':') && !evsel->is_libpfm_event))
2876             sep = "";
2877 
2878         if (asprintf(&new_name, "%s%su", name, sep) < 0)
2879             return false;
2880 
2881         if (evsel->name)
2882             free(evsel->name);
2883         evsel->name = new_name;
2884         scnprintf(msg, msgsize, "kernel.perf_event_paranoid=%d, trying "
2885               "to fall back to excluding kernel and hypervisor "
2886               " samples", paranoid);
2887         evsel->core.attr.exclude_kernel = 1;
2888         evsel->core.attr.exclude_hv     = 1;
2889 
2890         return true;
2891     }
2892 
2893     return false;
2894 }
2895 
2896 static bool find_process(const char *name)
2897 {
2898     size_t len = strlen(name);
2899     DIR *dir;
2900     struct dirent *d;
2901     int ret = -1;
2902 
2903     dir = opendir(procfs__mountpoint());
2904     if (!dir)
2905         return false;
2906 
2907     /* Walk through the directory. */
2908     while (ret && (d = readdir(dir)) != NULL) {
2909         char path[PATH_MAX];
2910         char *data;
2911         size_t size;
2912 
2913         if ((d->d_type != DT_DIR) ||
2914              !strcmp(".", d->d_name) ||
2915              !strcmp("..", d->d_name))
2916             continue;
2917 
2918         scnprintf(path, sizeof(path), "%s/%s/comm",
2919               procfs__mountpoint(), d->d_name);
2920 
2921         if (filename__read_str(path, &data, &size))
2922             continue;
2923 
2924         ret = strncmp(name, data, len);
2925         free(data);
2926     }
2927 
2928     closedir(dir);
2929     return ret ? false : true;
2930 }
2931 
2932 static bool is_amd(const char *arch, const char *cpuid)
2933 {
2934     return arch && !strcmp("x86", arch) && cpuid && strstarts(cpuid, "AuthenticAMD");
2935 }
2936 
2937 static bool is_amd_ibs(struct evsel *evsel)
2938 {
2939     return evsel->core.attr.precise_ip
2940         || (evsel->pmu_name && !strncmp(evsel->pmu_name, "ibs", 3));
2941 }
2942 
2943 int evsel__open_strerror(struct evsel *evsel, struct target *target,
2944              int err, char *msg, size_t size)
2945 {
2946     struct perf_env *env = evsel__env(evsel);
2947     const char *arch = perf_env__arch(env);
2948     const char *cpuid = perf_env__cpuid(env);
2949     char sbuf[STRERR_BUFSIZE];
2950     int printed = 0, enforced = 0;
2951 
2952     switch (err) {
2953     case EPERM:
2954     case EACCES:
2955         printed += scnprintf(msg + printed, size - printed,
2956             "Access to performance monitoring and observability operations is limited.\n");
2957 
2958         if (!sysfs__read_int("fs/selinux/enforce", &enforced)) {
2959             if (enforced) {
2960                 printed += scnprintf(msg + printed, size - printed,
2961                     "Enforced MAC policy settings (SELinux) can limit access to performance\n"
2962                     "monitoring and observability operations. Inspect system audit records for\n"
2963                     "more perf_event access control information and adjusting the policy.\n");
2964             }
2965         }
2966 
2967         if (err == EPERM)
2968             printed += scnprintf(msg, size,
2969                 "No permission to enable %s event.\n\n", evsel__name(evsel));
2970 
2971         return scnprintf(msg + printed, size - printed,
2972          "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n"
2973          "access to performance monitoring and observability operations for processes\n"
2974          "without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n"
2975          "More information can be found at 'Perf events and tool security' document:\n"
2976          "https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html\n"
2977          "perf_event_paranoid setting is %d:\n"
2978          "  -1: Allow use of (almost) all events by all users\n"
2979          "      Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n"
2980          ">= 0: Disallow raw and ftrace function tracepoint access\n"
2981          ">= 1: Disallow CPU event access\n"
2982          ">= 2: Disallow kernel profiling\n"
2983          "To make the adjusted perf_event_paranoid setting permanent preserve it\n"
2984          "in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = <setting>)",
2985          perf_event_paranoid());
2986     case ENOENT:
2987         return scnprintf(msg, size, "The %s event is not supported.", evsel__name(evsel));
2988     case EMFILE:
2989         return scnprintf(msg, size, "%s",
2990              "Too many events are opened.\n"
2991              "Probably the maximum number of open file descriptors has been reached.\n"
2992              "Hint: Try again after reducing the number of events.\n"
2993              "Hint: Try increasing the limit with 'ulimit -n <limit>'");
2994     case ENOMEM:
2995         if (evsel__has_callchain(evsel) &&
2996             access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0)
2997             return scnprintf(msg, size,
2998                      "Not enough memory to setup event with callchain.\n"
2999                      "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n"
3000                      "Hint: Current value: %d", sysctl__max_stack());
3001         break;
3002     case ENODEV:
3003         if (target->cpu_list)
3004             return scnprintf(msg, size, "%s",
3005      "No such device - did you specify an out-of-range profile CPU?");
3006         break;
3007     case EOPNOTSUPP:
3008         if (evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK)
3009             return scnprintf(msg, size,
3010     "%s: PMU Hardware or event type doesn't support branch stack sampling.",
3011                      evsel__name(evsel));
3012         if (evsel->core.attr.aux_output)
3013             return scnprintf(msg, size,
3014     "%s: PMU Hardware doesn't support 'aux_output' feature",
3015                      evsel__name(evsel));
3016         if (evsel->core.attr.sample_period != 0)
3017             return scnprintf(msg, size,
3018     "%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
3019                      evsel__name(evsel));
3020         if (evsel->core.attr.precise_ip)
3021             return scnprintf(msg, size, "%s",
3022     "\'precise\' request may not be supported. Try removing 'p' modifier.");
3023 #if defined(__i386__) || defined(__x86_64__)
3024         if (evsel->core.attr.type == PERF_TYPE_HARDWARE)
3025             return scnprintf(msg, size, "%s",
3026     "No hardware sampling interrupt available.\n");
3027 #endif
3028         break;
3029     case EBUSY:
3030         if (find_process("oprofiled"))
3031             return scnprintf(msg, size,
3032     "The PMU counters are busy/taken by another profiler.\n"
3033     "We found oprofile daemon running, please stop it and try again.");
3034         break;
3035     case EINVAL:
3036         if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size)
3037             return scnprintf(msg, size, "Asking for the code page size isn't supported by this kernel.");
3038         if (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE && perf_missing_features.data_page_size)
3039             return scnprintf(msg, size, "Asking for the data page size isn't supported by this kernel.");
3040         if (evsel->core.attr.write_backward && perf_missing_features.write_backward)
3041             return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel.");
3042         if (perf_missing_features.clockid)
3043             return scnprintf(msg, size, "clockid feature not supported.");
3044         if (perf_missing_features.clockid_wrong)
3045             return scnprintf(msg, size, "wrong clockid (%d).", clockid);
3046         if (perf_missing_features.aux_output)
3047             return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel.");
3048         if (!target__has_cpu(target))
3049             return scnprintf(msg, size,
3050     "Invalid event (%s) in per-thread mode, enable system wide with '-a'.",
3051                     evsel__name(evsel));
3052         if (is_amd(arch, cpuid)) {
3053             if (is_amd_ibs(evsel)) {
3054                 if (evsel->core.attr.exclude_kernel)
3055                     return scnprintf(msg, size,
3056     "AMD IBS can't exclude kernel events.  Try running at a higher privilege level.");
3057                 if (!evsel->core.system_wide)
3058                     return scnprintf(msg, size,
3059     "AMD IBS may only be available in system-wide/per-cpu mode.  Try using -a, or -C and workload affinity");
3060             }
3061         }
3062 
3063         break;
3064     case ENODATA:
3065         return scnprintf(msg, size, "Cannot collect data source with the load latency event alone. "
3066                  "Please add an auxiliary event in front of the load latency event.");
3067     default:
3068         break;
3069     }
3070 
3071     return scnprintf(msg, size,
3072     "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
3073     "/bin/dmesg | grep -i perf may provide additional information.\n",
3074              err, str_error_r(err, sbuf, sizeof(sbuf)), evsel__name(evsel));
3075 }
3076 
3077 struct perf_env *evsel__env(struct evsel *evsel)
3078 {
3079     if (evsel && evsel->evlist && evsel->evlist->env)
3080         return evsel->evlist->env;
3081     return &perf_env;
3082 }
3083 
3084 static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
3085 {
3086     int cpu_map_idx, thread;
3087 
3088     for (cpu_map_idx = 0; cpu_map_idx < xyarray__max_x(evsel->core.fd); cpu_map_idx++) {
3089         for (thread = 0; thread < xyarray__max_y(evsel->core.fd);
3090              thread++) {
3091             int fd = FD(evsel, cpu_map_idx, thread);
3092 
3093             if (perf_evlist__id_add_fd(&evlist->core, &evsel->core,
3094                            cpu_map_idx, thread, fd) < 0)
3095                 return -1;
3096         }
3097     }
3098 
3099     return 0;
3100 }
3101 
3102 int evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
3103 {
3104     struct perf_cpu_map *cpus = evsel->core.cpus;
3105     struct perf_thread_map *threads = evsel->core.threads;
3106 
3107     if (perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr))
3108         return -ENOMEM;
3109 
3110     return store_evsel_ids(evsel, evlist);
3111 }
3112 
3113 void evsel__zero_per_pkg(struct evsel *evsel)
3114 {
3115     struct hashmap_entry *cur;
3116     size_t bkt;
3117 
3118     if (evsel->per_pkg_mask) {
3119         hashmap__for_each_entry(evsel->per_pkg_mask, cur, bkt)
3120             free((char *)cur->key);
3121 
3122         hashmap__clear(evsel->per_pkg_mask);
3123     }
3124 }
3125 
3126 bool evsel__is_hybrid(struct evsel *evsel)
3127 {
3128     return evsel->pmu_name && perf_pmu__is_hybrid(evsel->pmu_name);
3129 }
3130 
3131 struct evsel *evsel__leader(struct evsel *evsel)
3132 {
3133     return container_of(evsel->core.leader, struct evsel, core);
3134 }
3135 
3136 bool evsel__has_leader(struct evsel *evsel, struct evsel *leader)
3137 {
3138     return evsel->core.leader == &leader->core;
3139 }
3140 
3141 bool evsel__is_leader(struct evsel *evsel)
3142 {
3143     return evsel__has_leader(evsel, evsel);
3144 }
3145 
3146 void evsel__set_leader(struct evsel *evsel, struct evsel *leader)
3147 {
3148     evsel->core.leader = &leader->core;
3149 }
3150 
3151 int evsel__source_count(const struct evsel *evsel)
3152 {
3153     struct evsel *pos;
3154     int count = 0;
3155 
3156     evlist__for_each_entry(evsel->evlist, pos) {
3157         if (pos->metric_leader == evsel)
3158             count++;
3159     }
3160     return count;
3161 }
3162 
3163 bool __weak arch_evsel__must_be_in_group(const struct evsel *evsel __maybe_unused)
3164 {
3165     return false;
3166 }
3167 
3168 /*
3169  * Remove an event from a given group (leader).
3170  * Some events, e.g., perf metrics Topdown events,
3171  * must always be grouped. Ignore the events.
3172  */
3173 void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader)
3174 {
3175     if (!arch_evsel__must_be_in_group(evsel) && evsel != leader) {
3176         evsel__set_leader(evsel, evsel);
3177         evsel->core.nr_members = 0;
3178         leader->core.nr_members--;
3179     }
3180 }