Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Arm Statistical Profiling Extensions (SPE) support
0004  * Copyright (c) 2017-2018, Arm Ltd.
0005  */
0006 
0007 #include <linux/kernel.h>
0008 #include <linux/types.h>
0009 #include <linux/bitops.h>
0010 #include <linux/log2.h>
0011 #include <linux/zalloc.h>
0012 #include <time.h>
0013 
0014 #include "../../../util/cpumap.h"
0015 #include "../../../util/event.h"
0016 #include "../../../util/evsel.h"
0017 #include "../../../util/evsel_config.h"
0018 #include "../../../util/evlist.h"
0019 #include "../../../util/session.h"
0020 #include <internal/lib.h> // page_size
0021 #include "../../../util/pmu.h"
0022 #include "../../../util/debug.h"
0023 #include "../../../util/auxtrace.h"
0024 #include "../../../util/record.h"
0025 #include "../../../util/arm-spe.h"
0026 #include <tools/libc_compat.h> // reallocarray
0027 
0028 #define KiB(x) ((x) * 1024)
0029 #define MiB(x) ((x) * 1024 * 1024)
0030 
0031 struct arm_spe_recording {
0032     struct auxtrace_record      itr;
0033     struct perf_pmu         *arm_spe_pmu;
0034     struct evlist       *evlist;
0035     int         wrapped_cnt;
0036     bool            *wrapped;
0037 };
0038 
0039 static void arm_spe_set_timestamp(struct auxtrace_record *itr,
0040                   struct evsel *evsel)
0041 {
0042     struct arm_spe_recording *ptr;
0043     struct perf_pmu *arm_spe_pmu;
0044     struct evsel_config_term *term = evsel__get_config_term(evsel, CFG_CHG);
0045     u64 user_bits = 0, bit;
0046 
0047     ptr = container_of(itr, struct arm_spe_recording, itr);
0048     arm_spe_pmu = ptr->arm_spe_pmu;
0049 
0050     if (term)
0051         user_bits = term->val.cfg_chg;
0052 
0053     bit = perf_pmu__format_bits(&arm_spe_pmu->format, "ts_enable");
0054 
0055     /* Skip if user has set it */
0056     if (bit & user_bits)
0057         return;
0058 
0059     evsel->core.attr.config |= bit;
0060 }
0061 
0062 static size_t
0063 arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused,
0064                struct evlist *evlist __maybe_unused)
0065 {
0066     return ARM_SPE_AUXTRACE_PRIV_SIZE;
0067 }
0068 
0069 static int arm_spe_info_fill(struct auxtrace_record *itr,
0070                  struct perf_session *session,
0071                  struct perf_record_auxtrace_info *auxtrace_info,
0072                  size_t priv_size)
0073 {
0074     struct arm_spe_recording *sper =
0075             container_of(itr, struct arm_spe_recording, itr);
0076     struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
0077 
0078     if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE)
0079         return -EINVAL;
0080 
0081     if (!session->evlist->core.nr_mmaps)
0082         return -EINVAL;
0083 
0084     auxtrace_info->type = PERF_AUXTRACE_ARM_SPE;
0085     auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type;
0086 
0087     return 0;
0088 }
0089 
0090 static void
0091 arm_spe_snapshot_resolve_auxtrace_defaults(struct record_opts *opts,
0092                        bool privileged)
0093 {
0094     /*
0095      * The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size nor
0096      * snapshot size is specified, then the default is 4MiB for privileged users, 128KiB for
0097      * unprivileged users.
0098      *
0099      * The default auxtrace mmap size is 4MiB/page_size for privileged users, 128KiB for
0100      * unprivileged users. If an unprivileged user does not specify mmap pages, the mmap pages
0101      * will be reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the
0102      * user is likely to get an error as they exceed their mlock limmit.
0103      */
0104 
0105     /*
0106      * No size were given to '-S' or '-m,', so go with the default
0107      */
0108     if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
0109         if (privileged) {
0110             opts->auxtrace_mmap_pages = MiB(4) / page_size;
0111         } else {
0112             opts->auxtrace_mmap_pages = KiB(128) / page_size;
0113             if (opts->mmap_pages == UINT_MAX)
0114                 opts->mmap_pages = KiB(256) / page_size;
0115         }
0116     } else if (!opts->auxtrace_mmap_pages && !privileged && opts->mmap_pages == UINT_MAX) {
0117         opts->mmap_pages = KiB(256) / page_size;
0118     }
0119 
0120     /*
0121      * '-m,xyz' was specified but no snapshot size, so make the snapshot size as big as the
0122      * auxtrace mmap area.
0123      */
0124     if (!opts->auxtrace_snapshot_size)
0125         opts->auxtrace_snapshot_size = opts->auxtrace_mmap_pages * (size_t)page_size;
0126 
0127     /*
0128      * '-Sxyz' was specified but no auxtrace mmap area, so make the auxtrace mmap area big
0129      * enough to fit the requested snapshot size.
0130      */
0131     if (!opts->auxtrace_mmap_pages) {
0132         size_t sz = opts->auxtrace_snapshot_size;
0133 
0134         sz = round_up(sz, page_size) / page_size;
0135         opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
0136     }
0137 }
0138 
0139 static int arm_spe_recording_options(struct auxtrace_record *itr,
0140                      struct evlist *evlist,
0141                      struct record_opts *opts)
0142 {
0143     struct arm_spe_recording *sper =
0144             container_of(itr, struct arm_spe_recording, itr);
0145     struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
0146     struct evsel *evsel, *arm_spe_evsel = NULL;
0147     struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
0148     bool privileged = perf_event_paranoid_check(-1);
0149     struct evsel *tracking_evsel;
0150     int err;
0151     u64 bit;
0152 
0153     sper->evlist = evlist;
0154 
0155     evlist__for_each_entry(evlist, evsel) {
0156         if (evsel->core.attr.type == arm_spe_pmu->type) {
0157             if (arm_spe_evsel) {
0158                 pr_err("There may be only one " ARM_SPE_PMU_NAME "x event\n");
0159                 return -EINVAL;
0160             }
0161             evsel->core.attr.freq = 0;
0162             evsel->core.attr.sample_period = arm_spe_pmu->default_config->sample_period;
0163             evsel->needs_auxtrace_mmap = true;
0164             arm_spe_evsel = evsel;
0165             opts->full_auxtrace = true;
0166         }
0167     }
0168 
0169     if (!opts->full_auxtrace)
0170         return 0;
0171 
0172     /*
0173      * we are in snapshot mode.
0174      */
0175     if (opts->auxtrace_snapshot_mode) {
0176         /*
0177          * Command arguments '-Sxyz' and/or '-m,xyz' are missing, so fill those in with
0178          * default values.
0179          */
0180         if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages)
0181             arm_spe_snapshot_resolve_auxtrace_defaults(opts, privileged);
0182 
0183         /*
0184          * Snapshot size can't be bigger than the auxtrace area.
0185          */
0186         if (opts->auxtrace_snapshot_size > opts->auxtrace_mmap_pages * (size_t)page_size) {
0187             pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
0188                    opts->auxtrace_snapshot_size,
0189                    opts->auxtrace_mmap_pages * (size_t)page_size);
0190             return -EINVAL;
0191         }
0192 
0193         /*
0194          * Something went wrong somewhere - this shouldn't happen.
0195          */
0196         if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
0197             pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
0198             return -EINVAL;
0199         }
0200     }
0201 
0202     /* We are in full trace mode but '-m,xyz' wasn't specified */
0203     if (!opts->auxtrace_mmap_pages) {
0204         if (privileged) {
0205             opts->auxtrace_mmap_pages = MiB(4) / page_size;
0206         } else {
0207             opts->auxtrace_mmap_pages = KiB(128) / page_size;
0208             if (opts->mmap_pages == UINT_MAX)
0209                 opts->mmap_pages = KiB(256) / page_size;
0210         }
0211     }
0212 
0213     /* Validate auxtrace_mmap_pages */
0214     if (opts->auxtrace_mmap_pages) {
0215         size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
0216         size_t min_sz = KiB(8);
0217 
0218         if (sz < min_sz || !is_power_of_2(sz)) {
0219             pr_err("Invalid mmap size for ARM SPE: must be at least %zuKiB and a power of 2\n",
0220                    min_sz / 1024);
0221             return -EINVAL;
0222         }
0223     }
0224 
0225     if (opts->auxtrace_snapshot_mode)
0226         pr_debug2("%sx snapshot size: %zu\n", ARM_SPE_PMU_NAME,
0227               opts->auxtrace_snapshot_size);
0228 
0229     /*
0230      * To obtain the auxtrace buffer file descriptor, the auxtrace event
0231      * must come first.
0232      */
0233     evlist__to_front(evlist, arm_spe_evsel);
0234 
0235     /*
0236      * In the case of per-cpu mmaps, sample CPU for AUX event;
0237      * also enable the timestamp tracing for samples correlation.
0238      */
0239     if (!perf_cpu_map__empty(cpus)) {
0240         evsel__set_sample_bit(arm_spe_evsel, CPU);
0241         arm_spe_set_timestamp(itr, arm_spe_evsel);
0242     }
0243 
0244     /*
0245      * Set this only so that perf report knows that SPE generates memory info. It has no effect
0246      * on the opening of the event or the SPE data produced.
0247      */
0248     evsel__set_sample_bit(arm_spe_evsel, DATA_SRC);
0249 
0250     /*
0251      * The PHYS_ADDR flag does not affect the driver behaviour, it is used to
0252      * inform that the resulting output's SPE samples contain physical addresses
0253      * where applicable.
0254      */
0255     bit = perf_pmu__format_bits(&arm_spe_pmu->format, "pa_enable");
0256     if (arm_spe_evsel->core.attr.config & bit)
0257         evsel__set_sample_bit(arm_spe_evsel, PHYS_ADDR);
0258 
0259     /* Add dummy event to keep tracking */
0260     err = parse_event(evlist, "dummy:u");
0261     if (err)
0262         return err;
0263 
0264     tracking_evsel = evlist__last(evlist);
0265     evlist__set_tracking_event(evlist, tracking_evsel);
0266 
0267     tracking_evsel->core.attr.freq = 0;
0268     tracking_evsel->core.attr.sample_period = 1;
0269 
0270     /* In per-cpu case, always need the time of mmap events etc */
0271     if (!perf_cpu_map__empty(cpus)) {
0272         evsel__set_sample_bit(tracking_evsel, TIME);
0273         evsel__set_sample_bit(tracking_evsel, CPU);
0274 
0275         /* also track task context switch */
0276         if (!record_opts__no_switch_events(opts))
0277             tracking_evsel->core.attr.context_switch = 1;
0278     }
0279 
0280     return 0;
0281 }
0282 
0283 static int arm_spe_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused,
0284                      struct record_opts *opts,
0285                      const char *str)
0286 {
0287     unsigned long long snapshot_size = 0;
0288     char *endptr;
0289 
0290     if (str) {
0291         snapshot_size = strtoull(str, &endptr, 0);
0292         if (*endptr || snapshot_size > SIZE_MAX)
0293             return -1;
0294     }
0295 
0296     opts->auxtrace_snapshot_mode = true;
0297     opts->auxtrace_snapshot_size = snapshot_size;
0298 
0299     return 0;
0300 }
0301 
0302 static int arm_spe_snapshot_start(struct auxtrace_record *itr)
0303 {
0304     struct arm_spe_recording *ptr =
0305             container_of(itr, struct arm_spe_recording, itr);
0306     struct evsel *evsel;
0307 
0308     evlist__for_each_entry(ptr->evlist, evsel) {
0309         if (evsel->core.attr.type == ptr->arm_spe_pmu->type)
0310             return evsel__disable(evsel);
0311     }
0312     return -EINVAL;
0313 }
0314 
0315 static int arm_spe_snapshot_finish(struct auxtrace_record *itr)
0316 {
0317     struct arm_spe_recording *ptr =
0318             container_of(itr, struct arm_spe_recording, itr);
0319     struct evsel *evsel;
0320 
0321     evlist__for_each_entry(ptr->evlist, evsel) {
0322         if (evsel->core.attr.type == ptr->arm_spe_pmu->type)
0323             return evsel__enable(evsel);
0324     }
0325     return -EINVAL;
0326 }
0327 
0328 static int arm_spe_alloc_wrapped_array(struct arm_spe_recording *ptr, int idx)
0329 {
0330     bool *wrapped;
0331     int cnt = ptr->wrapped_cnt, new_cnt, i;
0332 
0333     /*
0334      * No need to allocate, so return early.
0335      */
0336     if (idx < cnt)
0337         return 0;
0338 
0339     /*
0340      * Make ptr->wrapped as big as idx.
0341      */
0342     new_cnt = idx + 1;
0343 
0344     /*
0345      * Free'ed in arm_spe_recording_free().
0346      */
0347     wrapped = reallocarray(ptr->wrapped, new_cnt, sizeof(bool));
0348     if (!wrapped)
0349         return -ENOMEM;
0350 
0351     /*
0352      * init new allocated values.
0353      */
0354     for (i = cnt; i < new_cnt; i++)
0355         wrapped[i] = false;
0356 
0357     ptr->wrapped_cnt = new_cnt;
0358     ptr->wrapped = wrapped;
0359 
0360     return 0;
0361 }
0362 
0363 static bool arm_spe_buffer_has_wrapped(unsigned char *buffer,
0364                       size_t buffer_size, u64 head)
0365 {
0366     u64 i, watermark;
0367     u64 *buf = (u64 *)buffer;
0368     size_t buf_size = buffer_size;
0369 
0370     /*
0371      * Defensively handle the case where head might be continually increasing - if its value is
0372      * equal or greater than the size of the ring buffer, then we can safely determine it has
0373      * wrapped around. Otherwise, continue to detect if head might have wrapped.
0374      */
0375     if (head >= buffer_size)
0376         return true;
0377 
0378     /*
0379      * We want to look the very last 512 byte (chosen arbitrarily) in the ring buffer.
0380      */
0381     watermark = buf_size - 512;
0382 
0383     /*
0384      * The value of head is somewhere within the size of the ring buffer. This can be that there
0385      * hasn't been enough data to fill the ring buffer yet or the trace time was so long that
0386      * head has numerically wrapped around.  To find we need to check if we have data at the
0387      * very end of the ring buffer.  We can reliably do this because mmap'ed pages are zeroed
0388      * out and there is a fresh mapping with every new session.
0389      */
0390 
0391     /*
0392      * head is less than 512 byte from the end of the ring buffer.
0393      */
0394     if (head > watermark)
0395         watermark = head;
0396 
0397     /*
0398      * Speed things up by using 64 bit transactions (see "u64 *buf" above)
0399      */
0400     watermark /= sizeof(u64);
0401     buf_size /= sizeof(u64);
0402 
0403     /*
0404      * If we find trace data at the end of the ring buffer, head has been there and has
0405      * numerically wrapped around at least once.
0406      */
0407     for (i = watermark; i < buf_size; i++)
0408         if (buf[i])
0409             return true;
0410 
0411     return false;
0412 }
0413 
0414 static int arm_spe_find_snapshot(struct auxtrace_record *itr, int idx,
0415                   struct auxtrace_mmap *mm, unsigned char *data,
0416                   u64 *head, u64 *old)
0417 {
0418     int err;
0419     bool wrapped;
0420     struct arm_spe_recording *ptr =
0421             container_of(itr, struct arm_spe_recording, itr);
0422 
0423     /*
0424      * Allocate memory to keep track of wrapping if this is the first
0425      * time we deal with this *mm.
0426      */
0427     if (idx >= ptr->wrapped_cnt) {
0428         err = arm_spe_alloc_wrapped_array(ptr, idx);
0429         if (err)
0430             return err;
0431     }
0432 
0433     /*
0434      * Check to see if *head has wrapped around.  If it hasn't only the
0435      * amount of data between *head and *old is snapshot'ed to avoid
0436      * bloating the perf.data file with zeros.  But as soon as *head has
0437      * wrapped around the entire size of the AUX ring buffer it taken.
0438      */
0439     wrapped = ptr->wrapped[idx];
0440     if (!wrapped && arm_spe_buffer_has_wrapped(data, mm->len, *head)) {
0441         wrapped = true;
0442         ptr->wrapped[idx] = true;
0443     }
0444 
0445     pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
0446           __func__, idx, (size_t)*old, (size_t)*head, mm->len);
0447 
0448     /*
0449      * No wrap has occurred, we can just use *head and *old.
0450      */
0451     if (!wrapped)
0452         return 0;
0453 
0454     /*
0455      * *head has wrapped around - adjust *head and *old to pickup the
0456      * entire content of the AUX buffer.
0457      */
0458     if (*head >= mm->len) {
0459         *old = *head - mm->len;
0460     } else {
0461         *head += mm->len;
0462         *old = *head - mm->len;
0463     }
0464 
0465     return 0;
0466 }
0467 
0468 static u64 arm_spe_reference(struct auxtrace_record *itr __maybe_unused)
0469 {
0470     struct timespec ts;
0471 
0472     clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
0473 
0474     return ts.tv_sec ^ ts.tv_nsec;
0475 }
0476 
0477 static void arm_spe_recording_free(struct auxtrace_record *itr)
0478 {
0479     struct arm_spe_recording *sper =
0480             container_of(itr, struct arm_spe_recording, itr);
0481 
0482     free(sper->wrapped);
0483     free(sper);
0484 }
0485 
0486 struct auxtrace_record *arm_spe_recording_init(int *err,
0487                            struct perf_pmu *arm_spe_pmu)
0488 {
0489     struct arm_spe_recording *sper;
0490 
0491     if (!arm_spe_pmu) {
0492         *err = -ENODEV;
0493         return NULL;
0494     }
0495 
0496     sper = zalloc(sizeof(struct arm_spe_recording));
0497     if (!sper) {
0498         *err = -ENOMEM;
0499         return NULL;
0500     }
0501 
0502     sper->arm_spe_pmu = arm_spe_pmu;
0503     sper->itr.pmu = arm_spe_pmu;
0504     sper->itr.snapshot_start = arm_spe_snapshot_start;
0505     sper->itr.snapshot_finish = arm_spe_snapshot_finish;
0506     sper->itr.find_snapshot = arm_spe_find_snapshot;
0507     sper->itr.parse_snapshot_options = arm_spe_parse_snapshot_options;
0508     sper->itr.recording_options = arm_spe_recording_options;
0509     sper->itr.info_priv_size = arm_spe_info_priv_size;
0510     sper->itr.info_fill = arm_spe_info_fill;
0511     sper->itr.free = arm_spe_recording_free;
0512     sper->itr.reference = arm_spe_reference;
0513     sper->itr.read_finish = auxtrace_record__read_finish;
0514     sper->itr.alignment = 0;
0515 
0516     *err = 0;
0517     return &sper->itr;
0518 }
0519 
0520 struct perf_event_attr
0521 *arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu)
0522 {
0523     struct perf_event_attr *attr;
0524 
0525     attr = zalloc(sizeof(struct perf_event_attr));
0526     if (!attr) {
0527         pr_err("arm_spe default config cannot allocate a perf_event_attr\n");
0528         return NULL;
0529     }
0530 
0531     /*
0532      * If kernel driver doesn't advertise a minimum,
0533      * use max allowable by PMSIDR_EL1.INTERVAL
0534      */
0535     if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu",
0536                   &attr->sample_period) != 1) {
0537         pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n");
0538         attr->sample_period = 4096;
0539     }
0540 
0541     arm_spe_pmu->selectable = true;
0542     arm_spe_pmu->is_uncore = false;
0543 
0544     return attr;
0545 }