0001
0002 #include <errno.h>
0003 #include <linux/err.h>
0004 #include <inttypes.h>
0005 #include <math.h>
0006 #include <string.h>
0007 #include "counts.h"
0008 #include "cpumap.h"
0009 #include "debug.h"
0010 #include "header.h"
0011 #include "stat.h"
0012 #include "session.h"
0013 #include "target.h"
0014 #include "evlist.h"
0015 #include "evsel.h"
0016 #include "thread_map.h"
0017 #include "hashmap.h"
0018 #include <linux/zalloc.h>
0019
0020 void update_stats(struct stats *stats, u64 val)
0021 {
0022 double delta;
0023
0024 stats->n++;
0025 delta = val - stats->mean;
0026 stats->mean += delta / stats->n;
0027 stats->M2 += delta*(val - stats->mean);
0028
0029 if (val > stats->max)
0030 stats->max = val;
0031
0032 if (val < stats->min)
0033 stats->min = val;
0034 }
0035
0036 double avg_stats(struct stats *stats)
0037 {
0038 return stats->mean;
0039 }
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057 double stddev_stats(struct stats *stats)
0058 {
0059 double variance, variance_mean;
0060
0061 if (stats->n < 2)
0062 return 0.0;
0063
0064 variance = stats->M2 / (stats->n - 1);
0065 variance_mean = variance / stats->n;
0066
0067 return sqrt(variance_mean);
0068 }
0069
0070 double rel_stddev_stats(double stddev, double avg)
0071 {
0072 double pct = 0.0;
0073
0074 if (avg)
0075 pct = 100.0 * stddev/avg;
0076
0077 return pct;
0078 }
0079
0080 bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id)
0081 {
0082 struct perf_stat_evsel *ps = evsel->stats;
0083
0084 return ps->id == id;
0085 }
0086
0087 #define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name
0088 static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
0089 ID(NONE, x),
0090 ID(CYCLES_IN_TX, cpu/cycles-t/),
0091 ID(TRANSACTION_START, cpu/tx-start/),
0092 ID(ELISION_START, cpu/el-start/),
0093 ID(CYCLES_IN_TX_CP, cpu/cycles-ct/),
0094 ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots),
0095 ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued),
0096 ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
0097 ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
0098 ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
0099 ID(TOPDOWN_RETIRING, topdown-retiring),
0100 ID(TOPDOWN_BAD_SPEC, topdown-bad-spec),
0101 ID(TOPDOWN_FE_BOUND, topdown-fe-bound),
0102 ID(TOPDOWN_BE_BOUND, topdown-be-bound),
0103 ID(TOPDOWN_HEAVY_OPS, topdown-heavy-ops),
0104 ID(TOPDOWN_BR_MISPREDICT, topdown-br-mispredict),
0105 ID(TOPDOWN_FETCH_LAT, topdown-fetch-lat),
0106 ID(TOPDOWN_MEM_BOUND, topdown-mem-bound),
0107 ID(SMI_NUM, msr/smi/),
0108 ID(APERF, msr/aperf/),
0109 };
0110 #undef ID
0111
0112 static void perf_stat_evsel_id_init(struct evsel *evsel)
0113 {
0114 struct perf_stat_evsel *ps = evsel->stats;
0115 int i;
0116
0117
0118
0119 for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
0120 if (!strcmp(evsel__name(evsel), id_str[i]) ||
0121 (strstr(evsel__name(evsel), id_str[i]) && evsel->pmu_name
0122 && strstr(evsel__name(evsel), evsel->pmu_name))) {
0123 ps->id = i;
0124 break;
0125 }
0126 }
0127 }
0128
0129 static void evsel__reset_stat_priv(struct evsel *evsel)
0130 {
0131 int i;
0132 struct perf_stat_evsel *ps = evsel->stats;
0133
0134 for (i = 0; i < 3; i++)
0135 init_stats(&ps->res_stats[i]);
0136
0137 perf_stat_evsel_id_init(evsel);
0138 }
0139
0140 static int evsel__alloc_stat_priv(struct evsel *evsel)
0141 {
0142 evsel->stats = zalloc(sizeof(struct perf_stat_evsel));
0143 if (evsel->stats == NULL)
0144 return -ENOMEM;
0145 evsel__reset_stat_priv(evsel);
0146 return 0;
0147 }
0148
0149 static void evsel__free_stat_priv(struct evsel *evsel)
0150 {
0151 struct perf_stat_evsel *ps = evsel->stats;
0152
0153 if (ps)
0154 zfree(&ps->group_data);
0155 zfree(&evsel->stats);
0156 }
0157
0158 static int evsel__alloc_prev_raw_counts(struct evsel *evsel)
0159 {
0160 int cpu_map_nr = evsel__nr_cpus(evsel);
0161 int nthreads = perf_thread_map__nr(evsel->core.threads);
0162 struct perf_counts *counts;
0163
0164 counts = perf_counts__new(cpu_map_nr, nthreads);
0165 if (counts)
0166 evsel->prev_raw_counts = counts;
0167
0168 return counts ? 0 : -ENOMEM;
0169 }
0170
0171 static void evsel__free_prev_raw_counts(struct evsel *evsel)
0172 {
0173 perf_counts__delete(evsel->prev_raw_counts);
0174 evsel->prev_raw_counts = NULL;
0175 }
0176
0177 static void evsel__reset_prev_raw_counts(struct evsel *evsel)
0178 {
0179 if (evsel->prev_raw_counts)
0180 perf_counts__reset(evsel->prev_raw_counts);
0181 }
0182
0183 static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
0184 {
0185 if (evsel__alloc_stat_priv(evsel) < 0 ||
0186 evsel__alloc_counts(evsel) < 0 ||
0187 (alloc_raw && evsel__alloc_prev_raw_counts(evsel) < 0))
0188 return -ENOMEM;
0189
0190 return 0;
0191 }
0192
0193 int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw)
0194 {
0195 struct evsel *evsel;
0196
0197 evlist__for_each_entry(evlist, evsel) {
0198 if (evsel__alloc_stats(evsel, alloc_raw))
0199 goto out_free;
0200 }
0201
0202 return 0;
0203
0204 out_free:
0205 evlist__free_stats(evlist);
0206 return -1;
0207 }
0208
0209 void evlist__free_stats(struct evlist *evlist)
0210 {
0211 struct evsel *evsel;
0212
0213 evlist__for_each_entry(evlist, evsel) {
0214 evsel__free_stat_priv(evsel);
0215 evsel__free_counts(evsel);
0216 evsel__free_prev_raw_counts(evsel);
0217 }
0218 }
0219
0220 void evlist__reset_stats(struct evlist *evlist)
0221 {
0222 struct evsel *evsel;
0223
0224 evlist__for_each_entry(evlist, evsel) {
0225 evsel__reset_stat_priv(evsel);
0226 evsel__reset_counts(evsel);
0227 }
0228 }
0229
0230 void evlist__reset_prev_raw_counts(struct evlist *evlist)
0231 {
0232 struct evsel *evsel;
0233
0234 evlist__for_each_entry(evlist, evsel)
0235 evsel__reset_prev_raw_counts(evsel);
0236 }
0237
0238 static void evsel__copy_prev_raw_counts(struct evsel *evsel)
0239 {
0240 int idx, nthreads = perf_thread_map__nr(evsel->core.threads);
0241
0242 for (int thread = 0; thread < nthreads; thread++) {
0243 perf_cpu_map__for_each_idx(idx, evsel__cpus(evsel)) {
0244 *perf_counts(evsel->counts, idx, thread) =
0245 *perf_counts(evsel->prev_raw_counts, idx, thread);
0246 }
0247 }
0248
0249 evsel->counts->aggr = evsel->prev_raw_counts->aggr;
0250 }
0251
0252 void evlist__copy_prev_raw_counts(struct evlist *evlist)
0253 {
0254 struct evsel *evsel;
0255
0256 evlist__for_each_entry(evlist, evsel)
0257 evsel__copy_prev_raw_counts(evsel);
0258 }
0259
0260 void evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
0261 {
0262 struct evsel *evsel;
0263
0264
0265
0266
0267
0268
0269
0270
0271
0272
0273
0274 evlist__for_each_entry(evlist, evsel) {
0275 *perf_counts(evsel->prev_raw_counts, 0, 0) =
0276 evsel->prev_raw_counts->aggr;
0277 }
0278 }
0279
0280 static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused)
0281 {
0282 uint64_t *key = (uint64_t *) __key;
0283
0284 return *key & 0xffffffff;
0285 }
0286
0287 static bool pkg_id_equal(const void *__key1, const void *__key2,
0288 void *ctx __maybe_unused)
0289 {
0290 uint64_t *key1 = (uint64_t *) __key1;
0291 uint64_t *key2 = (uint64_t *) __key2;
0292
0293 return *key1 == *key2;
0294 }
0295
0296 static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals,
0297 int cpu_map_idx, bool *skip)
0298 {
0299 struct hashmap *mask = counter->per_pkg_mask;
0300 struct perf_cpu_map *cpus = evsel__cpus(counter);
0301 struct perf_cpu cpu = perf_cpu_map__cpu(cpus, cpu_map_idx);
0302 int s, d, ret = 0;
0303 uint64_t *key;
0304
0305 *skip = false;
0306
0307 if (!counter->per_pkg)
0308 return 0;
0309
0310 if (perf_cpu_map__empty(cpus))
0311 return 0;
0312
0313 if (!mask) {
0314 mask = hashmap__new(pkg_id_hash, pkg_id_equal, NULL);
0315 if (IS_ERR(mask))
0316 return -ENOMEM;
0317
0318 counter->per_pkg_mask = mask;
0319 }
0320
0321
0322
0323
0324
0325
0326
0327
0328
0329 if (!(vals->run && vals->ena))
0330 return 0;
0331
0332 s = cpu__get_socket_id(cpu);
0333 if (s < 0)
0334 return -1;
0335
0336
0337
0338
0339
0340 d = cpu__get_die_id(cpu);
0341 if (d < 0)
0342 return -1;
0343
0344 key = malloc(sizeof(*key));
0345 if (!key)
0346 return -ENOMEM;
0347
0348 *key = (uint64_t)d << 32 | s;
0349 if (hashmap__find(mask, (void *)key, NULL)) {
0350 *skip = true;
0351 free(key);
0352 } else
0353 ret = hashmap__add(mask, (void *)key, (void *)1);
0354
0355 return ret;
0356 }
0357
0358 static int
0359 process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
0360 int cpu_map_idx, int thread,
0361 struct perf_counts_values *count)
0362 {
0363 struct perf_counts_values *aggr = &evsel->counts->aggr;
0364 static struct perf_counts_values zero;
0365 bool skip = false;
0366
0367 if (check_per_pkg(evsel, count, cpu_map_idx, &skip)) {
0368 pr_err("failed to read per-pkg counter\n");
0369 return -1;
0370 }
0371
0372 if (skip)
0373 count = &zero;
0374
0375 switch (config->aggr_mode) {
0376 case AGGR_THREAD:
0377 case AGGR_CORE:
0378 case AGGR_DIE:
0379 case AGGR_SOCKET:
0380 case AGGR_NODE:
0381 case AGGR_NONE:
0382 if (!evsel->snapshot)
0383 evsel__compute_deltas(evsel, cpu_map_idx, thread, count);
0384 perf_counts_values__scale(count, config->scale, NULL);
0385 if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) {
0386 perf_stat__update_shadow_stats(evsel, count->val,
0387 cpu_map_idx, &rt_stat);
0388 }
0389
0390 if (config->aggr_mode == AGGR_THREAD) {
0391 if (config->stats)
0392 perf_stat__update_shadow_stats(evsel,
0393 count->val, 0, &config->stats[thread]);
0394 else
0395 perf_stat__update_shadow_stats(evsel,
0396 count->val, 0, &rt_stat);
0397 }
0398 break;
0399 case AGGR_GLOBAL:
0400 aggr->val += count->val;
0401 aggr->ena += count->ena;
0402 aggr->run += count->run;
0403 case AGGR_UNSET:
0404 case AGGR_MAX:
0405 default:
0406 break;
0407 }
0408
0409 return 0;
0410 }
0411
0412 static int process_counter_maps(struct perf_stat_config *config,
0413 struct evsel *counter)
0414 {
0415 int nthreads = perf_thread_map__nr(counter->core.threads);
0416 int ncpus = evsel__nr_cpus(counter);
0417 int idx, thread;
0418
0419 if (counter->core.system_wide)
0420 nthreads = 1;
0421
0422 for (thread = 0; thread < nthreads; thread++) {
0423 for (idx = 0; idx < ncpus; idx++) {
0424 if (process_counter_values(config, counter, idx, thread,
0425 perf_counts(counter->counts, idx, thread)))
0426 return -1;
0427 }
0428 }
0429
0430 return 0;
0431 }
0432
0433 int perf_stat_process_counter(struct perf_stat_config *config,
0434 struct evsel *counter)
0435 {
0436 struct perf_counts_values *aggr = &counter->counts->aggr;
0437 struct perf_stat_evsel *ps = counter->stats;
0438 u64 *count = counter->counts->aggr.values;
0439 int i, ret;
0440
0441 aggr->val = aggr->ena = aggr->run = 0;
0442
0443 if (counter->per_pkg)
0444 evsel__zero_per_pkg(counter);
0445
0446 ret = process_counter_maps(config, counter);
0447 if (ret)
0448 return ret;
0449
0450 if (config->aggr_mode != AGGR_GLOBAL)
0451 return 0;
0452
0453 if (!counter->snapshot)
0454 evsel__compute_deltas(counter, -1, -1, aggr);
0455 perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
0456
0457 for (i = 0; i < 3; i++)
0458 update_stats(&ps->res_stats[i], count[i]);
0459
0460 if (verbose > 0) {
0461 fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
0462 evsel__name(counter), count[0], count[1], count[2]);
0463 }
0464
0465
0466
0467
0468 perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat);
0469
0470 return 0;
0471 }
0472
0473 int perf_event__process_stat_event(struct perf_session *session,
0474 union perf_event *event)
0475 {
0476 struct perf_counts_values count, *ptr;
0477 struct perf_record_stat *st = &event->stat;
0478 struct evsel *counter;
0479 int cpu_map_idx;
0480
0481 count.val = st->val;
0482 count.ena = st->ena;
0483 count.run = st->run;
0484
0485 counter = evlist__id2evsel(session->evlist, st->id);
0486 if (!counter) {
0487 pr_err("Failed to resolve counter for stat event.\n");
0488 return -EINVAL;
0489 }
0490 cpu_map_idx = perf_cpu_map__idx(evsel__cpus(counter), (struct perf_cpu){.cpu = st->cpu});
0491 if (cpu_map_idx == -1) {
0492 pr_err("Invalid CPU %d for event %s.\n", st->cpu, evsel__name(counter));
0493 return -EINVAL;
0494 }
0495 ptr = perf_counts(counter->counts, cpu_map_idx, st->thread);
0496 if (ptr == NULL) {
0497 pr_err("Failed to find perf count for CPU %d thread %d on event %s.\n",
0498 st->cpu, st->thread, evsel__name(counter));
0499 return -EINVAL;
0500 }
0501 *ptr = count;
0502 counter->supported = true;
0503 return 0;
0504 }
0505
0506 size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp)
0507 {
0508 struct perf_record_stat *st = (struct perf_record_stat *)event;
0509 size_t ret;
0510
0511 ret = fprintf(fp, "\n... id %" PRI_lu64 ", cpu %d, thread %d\n",
0512 st->id, st->cpu, st->thread);
0513 ret += fprintf(fp, "... value %" PRI_lu64 ", enabled %" PRI_lu64 ", running %" PRI_lu64 "\n",
0514 st->val, st->ena, st->run);
0515
0516 return ret;
0517 }
0518
0519 size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp)
0520 {
0521 struct perf_record_stat_round *rd = (struct perf_record_stat_round *)event;
0522 size_t ret;
0523
0524 ret = fprintf(fp, "\n... time %" PRI_lu64 ", type %s\n", rd->time,
0525 rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL");
0526
0527 return ret;
0528 }
0529
0530 size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
0531 {
0532 struct perf_stat_config sc;
0533 size_t ret;
0534
0535 perf_event__read_stat_config(&sc, &event->stat_config);
0536
0537 ret = fprintf(fp, "\n");
0538 ret += fprintf(fp, "... aggr_mode %d\n", sc.aggr_mode);
0539 ret += fprintf(fp, "... scale %d\n", sc.scale);
0540 ret += fprintf(fp, "... interval %u\n", sc.interval);
0541
0542 return ret;
0543 }
0544
0545 int create_perf_stat_counter(struct evsel *evsel,
0546 struct perf_stat_config *config,
0547 struct target *target,
0548 int cpu_map_idx)
0549 {
0550 struct perf_event_attr *attr = &evsel->core.attr;
0551 struct evsel *leader = evsel__leader(evsel);
0552
0553 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
0554 PERF_FORMAT_TOTAL_TIME_RUNNING;
0555
0556
0557
0558
0559
0560
0561 if (leader->core.nr_members > 1)
0562 attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
0563
0564 attr->inherit = !config->no_inherit && list_empty(&evsel->bpf_counter_list);
0565
0566
0567
0568
0569
0570 attr->sample_period = 0;
0571
0572 if (config->identifier)
0573 attr->sample_type = PERF_SAMPLE_IDENTIFIER;
0574
0575 if (config->all_user) {
0576 attr->exclude_kernel = 1;
0577 attr->exclude_user = 0;
0578 }
0579
0580 if (config->all_kernel) {
0581 attr->exclude_kernel = 0;
0582 attr->exclude_user = 1;
0583 }
0584
0585
0586
0587
0588
0589
0590 if (evsel__is_group_leader(evsel)) {
0591 attr->disabled = 1;
0592
0593
0594
0595
0596
0597 if (target__none(target) && !config->initial_delay)
0598 attr->enable_on_exec = 1;
0599 }
0600
0601 if (target__has_cpu(target) && !target__has_per_thread(target))
0602 return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu_map_idx);
0603
0604 return evsel__open_per_thread(evsel, evsel->core.threads);
0605 }