Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * intel_pt.c: Intel Processor Trace support
0004  * Copyright (c) 2013-2015, Intel Corporation.
0005  */
0006 
0007 #include <errno.h>
0008 #include <stdbool.h>
0009 #include <linux/kernel.h>
0010 #include <linux/types.h>
0011 #include <linux/bitops.h>
0012 #include <linux/log2.h>
0013 #include <linux/zalloc.h>
0014 #include <cpuid.h>
0015 
0016 #include "../../../util/session.h"
0017 #include "../../../util/event.h"
0018 #include "../../../util/evlist.h"
0019 #include "../../../util/evsel.h"
0020 #include "../../../util/evsel_config.h"
0021 #include "../../../util/cpumap.h"
0022 #include "../../../util/mmap.h"
0023 #include <subcmd/parse-options.h>
0024 #include "../../../util/parse-events.h"
0025 #include "../../../util/pmu.h"
0026 #include "../../../util/debug.h"
0027 #include "../../../util/auxtrace.h"
0028 #include "../../../util/perf_api_probe.h"
0029 #include "../../../util/record.h"
0030 #include "../../../util/target.h"
0031 #include "../../../util/tsc.h"
0032 #include <internal/lib.h> // page_size
0033 #include "../../../util/intel-pt.h"
0034 
0035 #define KiB(x) ((x) * 1024)
0036 #define MiB(x) ((x) * 1024 * 1024)
0037 #define KiB_MASK(x) (KiB(x) - 1)
0038 #define MiB_MASK(x) (MiB(x) - 1)
0039 
0040 #define INTEL_PT_PSB_PERIOD_NEAR    256
0041 
0042 struct intel_pt_snapshot_ref {
0043     void *ref_buf;
0044     size_t ref_offset;
0045     bool wrapped;
0046 };
0047 
0048 struct intel_pt_recording {
0049     struct auxtrace_record      itr;
0050     struct perf_pmu         *intel_pt_pmu;
0051     int             have_sched_switch;
0052     struct evlist       *evlist;
0053     bool                snapshot_mode;
0054     bool                snapshot_init_done;
0055     size_t              snapshot_size;
0056     size_t              snapshot_ref_buf_size;
0057     int             snapshot_ref_cnt;
0058     struct intel_pt_snapshot_ref    *snapshot_refs;
0059     size_t              priv_size;
0060 };
0061 
0062 static int intel_pt_parse_terms_with_default(const char *pmu_name,
0063                          struct list_head *formats,
0064                          const char *str,
0065                          u64 *config)
0066 {
0067     struct list_head *terms;
0068     struct perf_event_attr attr = { .size = 0, };
0069     int err;
0070 
0071     terms = malloc(sizeof(struct list_head));
0072     if (!terms)
0073         return -ENOMEM;
0074 
0075     INIT_LIST_HEAD(terms);
0076 
0077     err = parse_events_terms(terms, str);
0078     if (err)
0079         goto out_free;
0080 
0081     attr.config = *config;
0082     err = perf_pmu__config_terms(pmu_name, formats, &attr, terms, true,
0083                      NULL);
0084     if (err)
0085         goto out_free;
0086 
0087     *config = attr.config;
0088 out_free:
0089     parse_events_terms__delete(terms);
0090     return err;
0091 }
0092 
0093 static int intel_pt_parse_terms(const char *pmu_name, struct list_head *formats,
0094                 const char *str, u64 *config)
0095 {
0096     *config = 0;
0097     return intel_pt_parse_terms_with_default(pmu_name, formats, str,
0098                          config);
0099 }
0100 
0101 static u64 intel_pt_masked_bits(u64 mask, u64 bits)
0102 {
0103     const u64 top_bit = 1ULL << 63;
0104     u64 res = 0;
0105     int i;
0106 
0107     for (i = 0; i < 64; i++) {
0108         if (mask & top_bit) {
0109             res <<= 1;
0110             if (bits & top_bit)
0111                 res |= 1;
0112         }
0113         mask <<= 1;
0114         bits <<= 1;
0115     }
0116 
0117     return res;
0118 }
0119 
0120 static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str,
0121                 struct evlist *evlist, u64 *res)
0122 {
0123     struct evsel *evsel;
0124     u64 mask;
0125 
0126     *res = 0;
0127 
0128     mask = perf_pmu__format_bits(&intel_pt_pmu->format, str);
0129     if (!mask)
0130         return -EINVAL;
0131 
0132     evlist__for_each_entry(evlist, evsel) {
0133         if (evsel->core.attr.type == intel_pt_pmu->type) {
0134             *res = intel_pt_masked_bits(mask, evsel->core.attr.config);
0135             return 0;
0136         }
0137     }
0138 
0139     return -EINVAL;
0140 }
0141 
0142 static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu,
0143                   struct evlist *evlist)
0144 {
0145     u64 val;
0146     int err, topa_multiple_entries;
0147     size_t psb_period;
0148 
0149     if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries",
0150                 "%d", &topa_multiple_entries) != 1)
0151         topa_multiple_entries = 0;
0152 
0153     /*
0154      * Use caps/topa_multiple_entries to indicate early hardware that had
0155      * extra frequent PSBs.
0156      */
0157     if (!topa_multiple_entries) {
0158         psb_period = 256;
0159         goto out;
0160     }
0161 
0162     err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val);
0163     if (err)
0164         val = 0;
0165 
0166     psb_period = 1 << (val + 11);
0167 out:
0168     pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period);
0169     return psb_period;
0170 }
0171 
0172 static int intel_pt_pick_bit(int bits, int target)
0173 {
0174     int pos, pick = -1;
0175 
0176     for (pos = 0; bits; bits >>= 1, pos++) {
0177         if (bits & 1) {
0178             if (pos <= target || pick < 0)
0179                 pick = pos;
0180             if (pos >= target)
0181                 break;
0182         }
0183     }
0184 
0185     return pick;
0186 }
0187 
0188 static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
0189 {
0190     char buf[256];
0191     int mtc, mtc_periods = 0, mtc_period;
0192     int psb_cyc, psb_periods, psb_period;
0193     int pos = 0;
0194     u64 config;
0195     char c;
0196 
0197     pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc");
0198 
0199     if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d",
0200                 &mtc) != 1)
0201         mtc = 1;
0202 
0203     if (mtc) {
0204         if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x",
0205                     &mtc_periods) != 1)
0206             mtc_periods = 0;
0207         if (mtc_periods) {
0208             mtc_period = intel_pt_pick_bit(mtc_periods, 3);
0209             pos += scnprintf(buf + pos, sizeof(buf) - pos,
0210                      ",mtc,mtc_period=%d", mtc_period);
0211         }
0212     }
0213 
0214     if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d",
0215                 &psb_cyc) != 1)
0216         psb_cyc = 1;
0217 
0218     if (psb_cyc && mtc_periods) {
0219         if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x",
0220                     &psb_periods) != 1)
0221             psb_periods = 0;
0222         if (psb_periods) {
0223             psb_period = intel_pt_pick_bit(psb_periods, 3);
0224             pos += scnprintf(buf + pos, sizeof(buf) - pos,
0225                      ",psb_period=%d", psb_period);
0226         }
0227     }
0228 
0229     if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 &&
0230         perf_pmu__scan_file(intel_pt_pmu, "format/branch", "%c", &c) == 1)
0231         pos += scnprintf(buf + pos, sizeof(buf) - pos, ",pt,branch");
0232 
0233     pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf);
0234 
0235     intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, buf,
0236                  &config);
0237 
0238     return config;
0239 }
0240 
0241 static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr,
0242                        struct record_opts *opts,
0243                        const char *str)
0244 {
0245     struct intel_pt_recording *ptr =
0246             container_of(itr, struct intel_pt_recording, itr);
0247     unsigned long long snapshot_size = 0;
0248     char *endptr;
0249 
0250     if (str) {
0251         snapshot_size = strtoull(str, &endptr, 0);
0252         if (*endptr || snapshot_size > SIZE_MAX)
0253             return -1;
0254     }
0255 
0256     opts->auxtrace_snapshot_mode = true;
0257     opts->auxtrace_snapshot_size = snapshot_size;
0258 
0259     ptr->snapshot_size = snapshot_size;
0260 
0261     return 0;
0262 }
0263 
0264 struct perf_event_attr *
0265 intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu)
0266 {
0267     struct perf_event_attr *attr;
0268 
0269     attr = zalloc(sizeof(struct perf_event_attr));
0270     if (!attr)
0271         return NULL;
0272 
0273     attr->config = intel_pt_default_config(intel_pt_pmu);
0274 
0275     intel_pt_pmu->selectable = true;
0276 
0277     return attr;
0278 }
0279 
0280 static const char *intel_pt_find_filter(struct evlist *evlist,
0281                     struct perf_pmu *intel_pt_pmu)
0282 {
0283     struct evsel *evsel;
0284 
0285     evlist__for_each_entry(evlist, evsel) {
0286         if (evsel->core.attr.type == intel_pt_pmu->type)
0287             return evsel->filter;
0288     }
0289 
0290     return NULL;
0291 }
0292 
0293 static size_t intel_pt_filter_bytes(const char *filter)
0294 {
0295     size_t len = filter ? strlen(filter) : 0;
0296 
0297     return len ? roundup(len + 1, 8) : 0;
0298 }
0299 
0300 static size_t
0301 intel_pt_info_priv_size(struct auxtrace_record *itr, struct evlist *evlist)
0302 {
0303     struct intel_pt_recording *ptr =
0304             container_of(itr, struct intel_pt_recording, itr);
0305     const char *filter = intel_pt_find_filter(evlist, ptr->intel_pt_pmu);
0306 
0307     ptr->priv_size = (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) +
0308              intel_pt_filter_bytes(filter);
0309     ptr->priv_size += sizeof(u64); /* Cap Event Trace */
0310 
0311     return ptr->priv_size;
0312 }
0313 
0314 static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d)
0315 {
0316     unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
0317 
0318     __get_cpuid(0x15, &eax, &ebx, &ecx, &edx);
0319     *n = ebx;
0320     *d = eax;
0321 }
0322 
0323 static int intel_pt_info_fill(struct auxtrace_record *itr,
0324                   struct perf_session *session,
0325                   struct perf_record_auxtrace_info *auxtrace_info,
0326                   size_t priv_size)
0327 {
0328     struct intel_pt_recording *ptr =
0329             container_of(itr, struct intel_pt_recording, itr);
0330     struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
0331     struct perf_event_mmap_page *pc;
0332     struct perf_tsc_conversion tc = { .time_mult = 0, };
0333     bool cap_user_time_zero = false, per_cpu_mmaps;
0334     u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit;
0335     u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d;
0336     unsigned long max_non_turbo_ratio;
0337     size_t filter_str_len;
0338     const char *filter;
0339     int event_trace;
0340     __u64 *info;
0341     int err;
0342 
0343     if (priv_size != ptr->priv_size)
0344         return -EINVAL;
0345 
0346     intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
0347                  "tsc", &tsc_bit);
0348     intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
0349                  "noretcomp", &noretcomp_bit);
0350     intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
0351                  "mtc", &mtc_bit);
0352     mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format,
0353                           "mtc_period");
0354     intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
0355                  "cyc", &cyc_bit);
0356 
0357     intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d);
0358 
0359     if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio",
0360                 "%lu", &max_non_turbo_ratio) != 1)
0361         max_non_turbo_ratio = 0;
0362     if (perf_pmu__scan_file(intel_pt_pmu, "caps/event_trace",
0363                 "%d", &event_trace) != 1)
0364         event_trace = 0;
0365 
0366     filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu);
0367     filter_str_len = filter ? strlen(filter) : 0;
0368 
0369     if (!session->evlist->core.nr_mmaps)
0370         return -EINVAL;
0371 
0372     pc = session->evlist->mmap[0].core.base;
0373     if (pc) {
0374         err = perf_read_tsc_conversion(pc, &tc);
0375         if (err) {
0376             if (err != -EOPNOTSUPP)
0377                 return err;
0378         } else {
0379             cap_user_time_zero = tc.time_mult != 0;
0380         }
0381         if (!cap_user_time_zero)
0382             ui__warning("Intel Processor Trace: TSC not available\n");
0383     }
0384 
0385     per_cpu_mmaps = !perf_cpu_map__empty(session->evlist->core.user_requested_cpus);
0386 
0387     auxtrace_info->type = PERF_AUXTRACE_INTEL_PT;
0388     auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type;
0389     auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift;
0390     auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult;
0391     auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero;
0392     auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero;
0393     auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit;
0394     auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit;
0395     auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch;
0396     auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode;
0397     auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps;
0398     auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit;
0399     auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits;
0400     auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n;
0401     auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d;
0402     auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit;
0403     auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO] = max_non_turbo_ratio;
0404     auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] = filter_str_len;
0405 
0406     info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
0407 
0408     if (filter_str_len) {
0409         size_t len = intel_pt_filter_bytes(filter);
0410 
0411         strncpy((char *)info, filter, len);
0412         info += len >> 3;
0413     }
0414 
0415     *info++ = event_trace;
0416 
0417     return 0;
0418 }
0419 
0420 static int intel_pt_track_switches(struct evlist *evlist)
0421 {
0422     const char *sched_switch = "sched:sched_switch";
0423     struct evsel *evsel;
0424     int err;
0425 
0426     if (!evlist__can_select_event(evlist, sched_switch))
0427         return -EPERM;
0428 
0429     err = parse_event(evlist, sched_switch);
0430     if (err) {
0431         pr_debug2("%s: failed to parse %s, error %d\n",
0432               __func__, sched_switch, err);
0433         return err;
0434     }
0435 
0436     evsel = evlist__last(evlist);
0437 
0438     evsel__set_sample_bit(evsel, CPU);
0439     evsel__set_sample_bit(evsel, TIME);
0440 
0441     evsel->core.system_wide = true;
0442     evsel->no_aux_samples = true;
0443     evsel->immediate = true;
0444 
0445     return 0;
0446 }
0447 
0448 static void intel_pt_valid_str(char *str, size_t len, u64 valid)
0449 {
0450     unsigned int val, last = 0, state = 1;
0451     int p = 0;
0452 
0453     str[0] = '\0';
0454 
0455     for (val = 0; val <= 64; val++, valid >>= 1) {
0456         if (valid & 1) {
0457             last = val;
0458             switch (state) {
0459             case 0:
0460                 p += scnprintf(str + p, len - p, ",");
0461                 /* Fall through */
0462             case 1:
0463                 p += scnprintf(str + p, len - p, "%u", val);
0464                 state = 2;
0465                 break;
0466             case 2:
0467                 state = 3;
0468                 break;
0469             case 3:
0470                 state = 4;
0471                 break;
0472             default:
0473                 break;
0474             }
0475         } else {
0476             switch (state) {
0477             case 3:
0478                 p += scnprintf(str + p, len - p, ",%u", last);
0479                 state = 0;
0480                 break;
0481             case 4:
0482                 p += scnprintf(str + p, len - p, "-%u", last);
0483                 state = 0;
0484                 break;
0485             default:
0486                 break;
0487             }
0488             if (state != 1)
0489                 state = 0;
0490         }
0491     }
0492 }
0493 
0494 static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu,
0495                     const char *caps, const char *name,
0496                     const char *supported, u64 config)
0497 {
0498     char valid_str[256];
0499     unsigned int shift;
0500     unsigned long long valid;
0501     u64 bits;
0502     int ok;
0503 
0504     if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1)
0505         valid = 0;
0506 
0507     if (supported &&
0508         perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok)
0509         valid = 0;
0510 
0511     valid |= 1;
0512 
0513     bits = perf_pmu__format_bits(&intel_pt_pmu->format, name);
0514 
0515     config &= bits;
0516 
0517     for (shift = 0; bits && !(bits & 1); shift++)
0518         bits >>= 1;
0519 
0520     config >>= shift;
0521 
0522     if (config > 63)
0523         goto out_err;
0524 
0525     if (valid & (1 << config))
0526         return 0;
0527 out_err:
0528     intel_pt_valid_str(valid_str, sizeof(valid_str), valid);
0529     pr_err("Invalid %s for %s. Valid values are: %s\n",
0530            name, INTEL_PT_PMU_NAME, valid_str);
0531     return -EINVAL;
0532 }
0533 
0534 static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu,
0535                     struct evsel *evsel)
0536 {
0537     int err;
0538     char c;
0539 
0540     if (!evsel)
0541         return 0;
0542 
0543     /*
0544      * If supported, force pass-through config term (pt=1) even if user
0545      * sets pt=0, which avoids senseless kernel errors.
0546      */
0547     if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 &&
0548         !(evsel->core.attr.config & 1)) {
0549         pr_warning("pt=0 doesn't make sense, forcing pt=1\n");
0550         evsel->core.attr.config |= 1;
0551     }
0552 
0553     err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds",
0554                        "cyc_thresh", "caps/psb_cyc",
0555                        evsel->core.attr.config);
0556     if (err)
0557         return err;
0558 
0559     err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods",
0560                        "mtc_period", "caps/mtc",
0561                        evsel->core.attr.config);
0562     if (err)
0563         return err;
0564 
0565     return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods",
0566                     "psb_period", "caps/psb_cyc",
0567                     evsel->core.attr.config);
0568 }
0569 
0570 static void intel_pt_config_sample_mode(struct perf_pmu *intel_pt_pmu,
0571                     struct evsel *evsel)
0572 {
0573     u64 user_bits = 0, bits;
0574     struct evsel_config_term *term = evsel__get_config_term(evsel, CFG_CHG);
0575 
0576     if (term)
0577         user_bits = term->val.cfg_chg;
0578 
0579     bits = perf_pmu__format_bits(&intel_pt_pmu->format, "psb_period");
0580 
0581     /* Did user change psb_period */
0582     if (bits & user_bits)
0583         return;
0584 
0585     /* Set psb_period to 0 */
0586     evsel->core.attr.config &= ~bits;
0587 }
0588 
0589 static void intel_pt_min_max_sample_sz(struct evlist *evlist,
0590                        size_t *min_sz, size_t *max_sz)
0591 {
0592     struct evsel *evsel;
0593 
0594     evlist__for_each_entry(evlist, evsel) {
0595         size_t sz = evsel->core.attr.aux_sample_size;
0596 
0597         if (!sz)
0598             continue;
0599         if (min_sz && (sz < *min_sz || !*min_sz))
0600             *min_sz = sz;
0601         if (max_sz && sz > *max_sz)
0602             *max_sz = sz;
0603     }
0604 }
0605 
0606 /*
0607  * Currently, there is not enough information to disambiguate different PEBS
0608  * events, so only allow one.
0609  */
0610 static bool intel_pt_too_many_aux_output(struct evlist *evlist)
0611 {
0612     struct evsel *evsel;
0613     int aux_output_cnt = 0;
0614 
0615     evlist__for_each_entry(evlist, evsel)
0616         aux_output_cnt += !!evsel->core.attr.aux_output;
0617 
0618     if (aux_output_cnt > 1) {
0619         pr_err(INTEL_PT_PMU_NAME " supports at most one event with aux-output\n");
0620         return true;
0621     }
0622 
0623     return false;
0624 }
0625 
0626 static int intel_pt_recording_options(struct auxtrace_record *itr,
0627                       struct evlist *evlist,
0628                       struct record_opts *opts)
0629 {
0630     struct intel_pt_recording *ptr =
0631             container_of(itr, struct intel_pt_recording, itr);
0632     struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
0633     bool have_timing_info, need_immediate = false;
0634     struct evsel *evsel, *intel_pt_evsel = NULL;
0635     const struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
0636     bool privileged = perf_event_paranoid_check(-1);
0637     u64 tsc_bit;
0638     int err;
0639 
0640     ptr->evlist = evlist;
0641     ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
0642 
0643     evlist__for_each_entry(evlist, evsel) {
0644         if (evsel->core.attr.type == intel_pt_pmu->type) {
0645             if (intel_pt_evsel) {
0646                 pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n");
0647                 return -EINVAL;
0648             }
0649             evsel->core.attr.freq = 0;
0650             evsel->core.attr.sample_period = 1;
0651             evsel->no_aux_samples = true;
0652             evsel->needs_auxtrace_mmap = true;
0653             intel_pt_evsel = evsel;
0654             opts->full_auxtrace = true;
0655         }
0656     }
0657 
0658     if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) {
0659         pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n");
0660         return -EINVAL;
0661     }
0662 
0663     if (opts->auxtrace_snapshot_mode && opts->auxtrace_sample_mode) {
0664         pr_err("Snapshot mode (" INTEL_PT_PMU_NAME " PMU) and sample trace cannot be used together\n");
0665         return -EINVAL;
0666     }
0667 
0668     if (opts->use_clockid) {
0669         pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n");
0670         return -EINVAL;
0671     }
0672 
0673     if (intel_pt_too_many_aux_output(evlist))
0674         return -EINVAL;
0675 
0676     if (!opts->full_auxtrace)
0677         return 0;
0678 
0679     if (opts->auxtrace_sample_mode)
0680         intel_pt_config_sample_mode(intel_pt_pmu, intel_pt_evsel);
0681 
0682     err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel);
0683     if (err)
0684         return err;
0685 
0686     /* Set default sizes for snapshot mode */
0687     if (opts->auxtrace_snapshot_mode) {
0688         size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist);
0689 
0690         if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
0691             if (privileged) {
0692                 opts->auxtrace_mmap_pages = MiB(4) / page_size;
0693             } else {
0694                 opts->auxtrace_mmap_pages = KiB(128) / page_size;
0695                 if (opts->mmap_pages == UINT_MAX)
0696                     opts->mmap_pages = KiB(256) / page_size;
0697             }
0698         } else if (!opts->auxtrace_mmap_pages && !privileged &&
0699                opts->mmap_pages == UINT_MAX) {
0700             opts->mmap_pages = KiB(256) / page_size;
0701         }
0702         if (!opts->auxtrace_snapshot_size)
0703             opts->auxtrace_snapshot_size =
0704                 opts->auxtrace_mmap_pages * (size_t)page_size;
0705         if (!opts->auxtrace_mmap_pages) {
0706             size_t sz = opts->auxtrace_snapshot_size;
0707 
0708             sz = round_up(sz, page_size) / page_size;
0709             opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
0710         }
0711         if (opts->auxtrace_snapshot_size >
0712                 opts->auxtrace_mmap_pages * (size_t)page_size) {
0713             pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
0714                    opts->auxtrace_snapshot_size,
0715                    opts->auxtrace_mmap_pages * (size_t)page_size);
0716             return -EINVAL;
0717         }
0718         if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
0719             pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
0720             return -EINVAL;
0721         }
0722         pr_debug2("Intel PT snapshot size: %zu\n",
0723               opts->auxtrace_snapshot_size);
0724         if (psb_period &&
0725             opts->auxtrace_snapshot_size <= psb_period +
0726                           INTEL_PT_PSB_PERIOD_NEAR)
0727             ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n",
0728                     opts->auxtrace_snapshot_size, psb_period);
0729     }
0730 
0731     /* Set default sizes for sample mode */
0732     if (opts->auxtrace_sample_mode) {
0733         size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist);
0734         size_t min_sz = 0, max_sz = 0;
0735 
0736         intel_pt_min_max_sample_sz(evlist, &min_sz, &max_sz);
0737         if (!opts->auxtrace_mmap_pages && !privileged &&
0738             opts->mmap_pages == UINT_MAX)
0739             opts->mmap_pages = KiB(256) / page_size;
0740         if (!opts->auxtrace_mmap_pages) {
0741             size_t sz = round_up(max_sz, page_size) / page_size;
0742 
0743             opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
0744         }
0745         if (max_sz > opts->auxtrace_mmap_pages * (size_t)page_size) {
0746             pr_err("Sample size %zu must not be greater than AUX area tracing mmap size %zu\n",
0747                    max_sz,
0748                    opts->auxtrace_mmap_pages * (size_t)page_size);
0749             return -EINVAL;
0750         }
0751         pr_debug2("Intel PT min. sample size: %zu max. sample size: %zu\n",
0752               min_sz, max_sz);
0753         if (psb_period &&
0754             min_sz <= psb_period + INTEL_PT_PSB_PERIOD_NEAR)
0755             ui__warning("Intel PT sample size (%zu) may be too small for PSB period (%zu)\n",
0756                     min_sz, psb_period);
0757     }
0758 
0759     /* Set default sizes for full trace mode */
0760     if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
0761         if (privileged) {
0762             opts->auxtrace_mmap_pages = MiB(4) / page_size;
0763         } else {
0764             opts->auxtrace_mmap_pages = KiB(128) / page_size;
0765             if (opts->mmap_pages == UINT_MAX)
0766                 opts->mmap_pages = KiB(256) / page_size;
0767         }
0768     }
0769 
0770     /* Validate auxtrace_mmap_pages */
0771     if (opts->auxtrace_mmap_pages) {
0772         size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
0773         size_t min_sz;
0774 
0775         if (opts->auxtrace_snapshot_mode || opts->auxtrace_sample_mode)
0776             min_sz = KiB(4);
0777         else
0778             min_sz = KiB(8);
0779 
0780         if (sz < min_sz || !is_power_of_2(sz)) {
0781             pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n",
0782                    min_sz / 1024);
0783             return -EINVAL;
0784         }
0785     }
0786 
0787     if (!opts->auxtrace_snapshot_mode && !opts->auxtrace_sample_mode) {
0788         u32 aux_watermark = opts->auxtrace_mmap_pages * page_size / 4;
0789 
0790         intel_pt_evsel->core.attr.aux_watermark = aux_watermark;
0791     }
0792 
0793     intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
0794                  "tsc", &tsc_bit);
0795 
0796     if (opts->full_auxtrace && (intel_pt_evsel->core.attr.config & tsc_bit))
0797         have_timing_info = true;
0798     else
0799         have_timing_info = false;
0800 
0801     /*
0802      * Per-cpu recording needs sched_switch events to distinguish different
0803      * threads.
0804      */
0805     if (have_timing_info && !perf_cpu_map__empty(cpus) &&
0806         !record_opts__no_switch_events(opts)) {
0807         if (perf_can_record_switch_events()) {
0808             bool cpu_wide = !target__none(&opts->target) &&
0809                     !target__has_task(&opts->target);
0810 
0811             if (!cpu_wide && perf_can_record_cpu_wide()) {
0812                 struct evsel *switch_evsel;
0813 
0814                 switch_evsel = evlist__add_dummy_on_all_cpus(evlist);
0815                 if (!switch_evsel)
0816                     return -ENOMEM;
0817 
0818                 switch_evsel->core.attr.context_switch = 1;
0819                 switch_evsel->immediate = true;
0820 
0821                 evsel__set_sample_bit(switch_evsel, TID);
0822                 evsel__set_sample_bit(switch_evsel, TIME);
0823                 evsel__set_sample_bit(switch_evsel, CPU);
0824                 evsel__reset_sample_bit(switch_evsel, BRANCH_STACK);
0825 
0826                 opts->record_switch_events = false;
0827                 ptr->have_sched_switch = 3;
0828             } else {
0829                 opts->record_switch_events = true;
0830                 need_immediate = true;
0831                 if (cpu_wide)
0832                     ptr->have_sched_switch = 3;
0833                 else
0834                     ptr->have_sched_switch = 2;
0835             }
0836         } else {
0837             err = intel_pt_track_switches(evlist);
0838             if (err == -EPERM)
0839                 pr_debug2("Unable to select sched:sched_switch\n");
0840             else if (err)
0841                 return err;
0842             else
0843                 ptr->have_sched_switch = 1;
0844         }
0845     }
0846 
0847     if (have_timing_info && !intel_pt_evsel->core.attr.exclude_kernel &&
0848         perf_can_record_text_poke_events() && perf_can_record_cpu_wide())
0849         opts->text_poke = true;
0850 
0851     if (intel_pt_evsel) {
0852         /*
0853          * To obtain the auxtrace buffer file descriptor, the auxtrace
0854          * event must come first.
0855          */
0856         evlist__to_front(evlist, intel_pt_evsel);
0857         /*
0858          * In the case of per-cpu mmaps, we need the CPU on the
0859          * AUX event.
0860          */
0861         if (!perf_cpu_map__empty(cpus))
0862             evsel__set_sample_bit(intel_pt_evsel, CPU);
0863     }
0864 
0865     /* Add dummy event to keep tracking */
0866     if (opts->full_auxtrace) {
0867         bool need_system_wide_tracking;
0868         struct evsel *tracking_evsel;
0869 
0870         /*
0871          * User space tasks can migrate between CPUs, so when tracing
0872          * selected CPUs, sideband for all CPUs is still needed.
0873          */
0874         need_system_wide_tracking = evlist->core.has_user_cpus &&
0875                         !intel_pt_evsel->core.attr.exclude_user;
0876 
0877         tracking_evsel = evlist__add_aux_dummy(evlist, need_system_wide_tracking);
0878         if (!tracking_evsel)
0879             return -ENOMEM;
0880 
0881         evlist__set_tracking_event(evlist, tracking_evsel);
0882 
0883         if (need_immediate)
0884             tracking_evsel->immediate = true;
0885 
0886         /* In per-cpu case, always need the time of mmap events etc */
0887         if (!perf_cpu_map__empty(cpus)) {
0888             evsel__set_sample_bit(tracking_evsel, TIME);
0889             /* And the CPU for switch events */
0890             evsel__set_sample_bit(tracking_evsel, CPU);
0891         }
0892         evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
0893     }
0894 
0895     /*
0896      * Warn the user when we do not have enough information to decode i.e.
0897      * per-cpu with no sched_switch (except workload-only).
0898      */
0899     if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) &&
0900         !target__none(&opts->target) &&
0901         !intel_pt_evsel->core.attr.exclude_user)
0902         ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
0903 
0904     return 0;
0905 }
0906 
0907 static int intel_pt_snapshot_start(struct auxtrace_record *itr)
0908 {
0909     struct intel_pt_recording *ptr =
0910             container_of(itr, struct intel_pt_recording, itr);
0911     struct evsel *evsel;
0912 
0913     evlist__for_each_entry(ptr->evlist, evsel) {
0914         if (evsel->core.attr.type == ptr->intel_pt_pmu->type)
0915             return evsel__disable(evsel);
0916     }
0917     return -EINVAL;
0918 }
0919 
0920 static int intel_pt_snapshot_finish(struct auxtrace_record *itr)
0921 {
0922     struct intel_pt_recording *ptr =
0923             container_of(itr, struct intel_pt_recording, itr);
0924     struct evsel *evsel;
0925 
0926     evlist__for_each_entry(ptr->evlist, evsel) {
0927         if (evsel->core.attr.type == ptr->intel_pt_pmu->type)
0928             return evsel__enable(evsel);
0929     }
0930     return -EINVAL;
0931 }
0932 
0933 static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx)
0934 {
0935     const size_t sz = sizeof(struct intel_pt_snapshot_ref);
0936     int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2;
0937     struct intel_pt_snapshot_ref *refs;
0938 
0939     if (!new_cnt)
0940         new_cnt = 16;
0941 
0942     while (new_cnt <= idx)
0943         new_cnt *= 2;
0944 
0945     refs = calloc(new_cnt, sz);
0946     if (!refs)
0947         return -ENOMEM;
0948 
0949     memcpy(refs, ptr->snapshot_refs, cnt * sz);
0950 
0951     ptr->snapshot_refs = refs;
0952     ptr->snapshot_ref_cnt = new_cnt;
0953 
0954     return 0;
0955 }
0956 
0957 static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr)
0958 {
0959     int i;
0960 
0961     for (i = 0; i < ptr->snapshot_ref_cnt; i++)
0962         zfree(&ptr->snapshot_refs[i].ref_buf);
0963     zfree(&ptr->snapshot_refs);
0964 }
0965 
0966 static void intel_pt_recording_free(struct auxtrace_record *itr)
0967 {
0968     struct intel_pt_recording *ptr =
0969             container_of(itr, struct intel_pt_recording, itr);
0970 
0971     intel_pt_free_snapshot_refs(ptr);
0972     free(ptr);
0973 }
0974 
0975 static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx,
0976                        size_t snapshot_buf_size)
0977 {
0978     size_t ref_buf_size = ptr->snapshot_ref_buf_size;
0979     void *ref_buf;
0980 
0981     ref_buf = zalloc(ref_buf_size);
0982     if (!ref_buf)
0983         return -ENOMEM;
0984 
0985     ptr->snapshot_refs[idx].ref_buf = ref_buf;
0986     ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size;
0987 
0988     return 0;
0989 }
0990 
0991 static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr,
0992                          size_t snapshot_buf_size)
0993 {
0994     const size_t max_size = 256 * 1024;
0995     size_t buf_size = 0, psb_period;
0996 
0997     if (ptr->snapshot_size <= 64 * 1024)
0998         return 0;
0999 
1000     psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist);
1001     if (psb_period)
1002         buf_size = psb_period * 2;
1003 
1004     if (!buf_size || buf_size > max_size)
1005         buf_size = max_size;
1006 
1007     if (buf_size >= snapshot_buf_size)
1008         return 0;
1009 
1010     if (buf_size >= ptr->snapshot_size / 2)
1011         return 0;
1012 
1013     return buf_size;
1014 }
1015 
1016 static int intel_pt_snapshot_init(struct intel_pt_recording *ptr,
1017                   size_t snapshot_buf_size)
1018 {
1019     if (ptr->snapshot_init_done)
1020         return 0;
1021 
1022     ptr->snapshot_init_done = true;
1023 
1024     ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr,
1025                             snapshot_buf_size);
1026 
1027     return 0;
1028 }
1029 
1030 /**
1031  * intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer.
1032  * @buf1: first buffer
1033  * @compare_size: number of bytes to compare
1034  * @buf2: second buffer (a circular buffer)
1035  * @offs2: offset in second buffer
1036  * @buf2_size: size of second buffer
1037  *
1038  * The comparison allows for the possibility that the bytes to compare in the
1039  * circular buffer are not contiguous.  It is assumed that @compare_size <=
1040  * @buf2_size.  This function returns %false if the bytes are identical, %true
1041  * otherwise.
1042  */
1043 static bool intel_pt_compare_buffers(void *buf1, size_t compare_size,
1044                      void *buf2, size_t offs2, size_t buf2_size)
1045 {
1046     size_t end2 = offs2 + compare_size, part_size;
1047 
1048     if (end2 <= buf2_size)
1049         return memcmp(buf1, buf2 + offs2, compare_size);
1050 
1051     part_size = end2 - buf2_size;
1052     if (memcmp(buf1, buf2 + offs2, part_size))
1053         return true;
1054 
1055     compare_size -= part_size;
1056 
1057     return memcmp(buf1 + part_size, buf2, compare_size);
1058 }
1059 
1060 static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset,
1061                  size_t ref_size, size_t buf_size,
1062                  void *data, size_t head)
1063 {
1064     size_t ref_end = ref_offset + ref_size;
1065 
1066     if (ref_end > buf_size) {
1067         if (head > ref_offset || head < ref_end - buf_size)
1068             return true;
1069     } else if (head > ref_offset && head < ref_end) {
1070         return true;
1071     }
1072 
1073     return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset,
1074                     buf_size);
1075 }
1076 
1077 static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size,
1078                   void *data, size_t head)
1079 {
1080     if (head >= ref_size) {
1081         memcpy(ref_buf, data + head - ref_size, ref_size);
1082     } else {
1083         memcpy(ref_buf, data, head);
1084         ref_size -= head;
1085         memcpy(ref_buf + head, data + buf_size - ref_size, ref_size);
1086     }
1087 }
1088 
1089 static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx,
1090                  struct auxtrace_mmap *mm, unsigned char *data,
1091                  u64 head)
1092 {
1093     struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx];
1094     bool wrapped;
1095 
1096     wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset,
1097                        ptr->snapshot_ref_buf_size, mm->len,
1098                        data, head);
1099 
1100     intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len,
1101               data, head);
1102 
1103     return wrapped;
1104 }
1105 
1106 static bool intel_pt_first_wrap(u64 *data, size_t buf_size)
1107 {
1108     int i, a, b;
1109 
1110     b = buf_size >> 3;
1111     a = b - 512;
1112     if (a < 0)
1113         a = 0;
1114 
1115     for (i = a; i < b; i++) {
1116         if (data[i])
1117             return true;
1118     }
1119 
1120     return false;
1121 }
1122 
1123 static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx,
1124                   struct auxtrace_mmap *mm, unsigned char *data,
1125                   u64 *head, u64 *old)
1126 {
1127     struct intel_pt_recording *ptr =
1128             container_of(itr, struct intel_pt_recording, itr);
1129     bool wrapped;
1130     int err;
1131 
1132     pr_debug3("%s: mmap index %d old head %zu new head %zu\n",
1133           __func__, idx, (size_t)*old, (size_t)*head);
1134 
1135     err = intel_pt_snapshot_init(ptr, mm->len);
1136     if (err)
1137         goto out_err;
1138 
1139     if (idx >= ptr->snapshot_ref_cnt) {
1140         err = intel_pt_alloc_snapshot_refs(ptr, idx);
1141         if (err)
1142             goto out_err;
1143     }
1144 
1145     if (ptr->snapshot_ref_buf_size) {
1146         if (!ptr->snapshot_refs[idx].ref_buf) {
1147             err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len);
1148             if (err)
1149                 goto out_err;
1150         }
1151         wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head);
1152     } else {
1153         wrapped = ptr->snapshot_refs[idx].wrapped;
1154         if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) {
1155             ptr->snapshot_refs[idx].wrapped = true;
1156             wrapped = true;
1157         }
1158     }
1159 
1160     /*
1161      * In full trace mode 'head' continually increases.  However in snapshot
1162      * mode 'head' is an offset within the buffer.  Here 'old' and 'head'
1163      * are adjusted to match the full trace case which expects that 'old' is
1164      * always less than 'head'.
1165      */
1166     if (wrapped) {
1167         *old = *head;
1168         *head += mm->len;
1169     } else {
1170         if (mm->mask)
1171             *old &= mm->mask;
1172         else
1173             *old %= mm->len;
1174         if (*old > *head)
1175             *head += mm->len;
1176     }
1177 
1178     pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n",
1179           __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head);
1180 
1181     return 0;
1182 
1183 out_err:
1184     pr_err("%s: failed, error %d\n", __func__, err);
1185     return err;
1186 }
1187 
1188 static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused)
1189 {
1190     return rdtsc();
1191 }
1192 
1193 struct auxtrace_record *intel_pt_recording_init(int *err)
1194 {
1195     struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
1196     struct intel_pt_recording *ptr;
1197 
1198     if (!intel_pt_pmu)
1199         return NULL;
1200 
1201     if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) {
1202         *err = -errno;
1203         return NULL;
1204     }
1205 
1206     ptr = zalloc(sizeof(struct intel_pt_recording));
1207     if (!ptr) {
1208         *err = -ENOMEM;
1209         return NULL;
1210     }
1211 
1212     ptr->intel_pt_pmu = intel_pt_pmu;
1213     ptr->itr.pmu = intel_pt_pmu;
1214     ptr->itr.recording_options = intel_pt_recording_options;
1215     ptr->itr.info_priv_size = intel_pt_info_priv_size;
1216     ptr->itr.info_fill = intel_pt_info_fill;
1217     ptr->itr.free = intel_pt_recording_free;
1218     ptr->itr.snapshot_start = intel_pt_snapshot_start;
1219     ptr->itr.snapshot_finish = intel_pt_snapshot_finish;
1220     ptr->itr.find_snapshot = intel_pt_find_snapshot;
1221     ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options;
1222     ptr->itr.reference = intel_pt_reference;
1223     ptr->itr.read_finish = auxtrace_record__read_finish;
1224     /*
1225      * Decoding starts at a PSB packet. Minimum PSB period is 2K so 4K
1226      * should give at least 1 PSB per sample.
1227      */
1228     ptr->itr.default_aux_sample_size = 4096;
1229     return &ptr->itr;
1230 }