0001
0002 #include <math.h>
0003 #include <stdio.h>
0004 #include "evsel.h"
0005 #include "stat.h"
0006 #include "color.h"
0007 #include "debug.h"
0008 #include "pmu.h"
0009 #include "rblist.h"
0010 #include "evlist.h"
0011 #include "expr.h"
0012 #include "metricgroup.h"
0013 #include "cgroup.h"
0014 #include "units.h"
0015 #include <linux/zalloc.h>
0016 #include "iostat.h"
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027 struct runtime_stat rt_stat;
0028 struct stats walltime_nsecs_stats;
0029 struct rusage_stats ru_stats;
0030
0031 struct saved_value {
0032 struct rb_node rb_node;
0033 struct evsel *evsel;
0034 enum stat_type type;
0035 int ctx;
0036 int cpu_map_idx;
0037 struct cgroup *cgrp;
0038 struct runtime_stat *stat;
0039 struct stats stats;
0040 u64 metric_total;
0041 int metric_other;
0042 };
0043
0044 static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
0045 {
0046 struct saved_value *a = container_of(rb_node,
0047 struct saved_value,
0048 rb_node);
0049 const struct saved_value *b = entry;
0050
0051 if (a->cpu_map_idx != b->cpu_map_idx)
0052 return a->cpu_map_idx - b->cpu_map_idx;
0053
0054
0055
0056
0057
0058
0059
0060
0061 if (a->type != b->type)
0062 return a->type - b->type;
0063
0064 if (a->ctx != b->ctx)
0065 return a->ctx - b->ctx;
0066
0067 if (a->cgrp != b->cgrp)
0068 return (char *)a->cgrp < (char *)b->cgrp ? -1 : +1;
0069
0070 if (a->evsel == NULL && b->evsel == NULL) {
0071 if (a->stat == b->stat)
0072 return 0;
0073
0074 if ((char *)a->stat < (char *)b->stat)
0075 return -1;
0076
0077 return 1;
0078 }
0079
0080 if (a->evsel == b->evsel)
0081 return 0;
0082 if ((char *)a->evsel < (char *)b->evsel)
0083 return -1;
0084 return +1;
0085 }
0086
0087 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
0088 const void *entry)
0089 {
0090 struct saved_value *nd = malloc(sizeof(struct saved_value));
0091
0092 if (!nd)
0093 return NULL;
0094 memcpy(nd, entry, sizeof(struct saved_value));
0095 return &nd->rb_node;
0096 }
0097
0098 static void saved_value_delete(struct rblist *rblist __maybe_unused,
0099 struct rb_node *rb_node)
0100 {
0101 struct saved_value *v;
0102
0103 BUG_ON(!rb_node);
0104 v = container_of(rb_node, struct saved_value, rb_node);
0105 free(v);
0106 }
0107
0108 static struct saved_value *saved_value_lookup(struct evsel *evsel,
0109 int cpu_map_idx,
0110 bool create,
0111 enum stat_type type,
0112 int ctx,
0113 struct runtime_stat *st,
0114 struct cgroup *cgrp)
0115 {
0116 struct rblist *rblist;
0117 struct rb_node *nd;
0118 struct saved_value dm = {
0119 .cpu_map_idx = cpu_map_idx,
0120 .evsel = evsel,
0121 .type = type,
0122 .ctx = ctx,
0123 .stat = st,
0124 .cgrp = cgrp,
0125 };
0126
0127 rblist = &st->value_list;
0128
0129
0130 if (type == STAT_NSECS)
0131 dm.ctx = 0;
0132
0133 nd = rblist__find(rblist, &dm);
0134 if (nd)
0135 return container_of(nd, struct saved_value, rb_node);
0136 if (create) {
0137 rblist__add_node(rblist, &dm);
0138 nd = rblist__find(rblist, &dm);
0139 if (nd)
0140 return container_of(nd, struct saved_value, rb_node);
0141 }
0142 return NULL;
0143 }
0144
0145 void runtime_stat__init(struct runtime_stat *st)
0146 {
0147 struct rblist *rblist = &st->value_list;
0148
0149 rblist__init(rblist);
0150 rblist->node_cmp = saved_value_cmp;
0151 rblist->node_new = saved_value_new;
0152 rblist->node_delete = saved_value_delete;
0153 }
0154
0155 void runtime_stat__exit(struct runtime_stat *st)
0156 {
0157 rblist__exit(&st->value_list);
0158 }
0159
0160 void perf_stat__init_shadow_stats(void)
0161 {
0162 runtime_stat__init(&rt_stat);
0163 }
0164
0165 static int evsel_context(struct evsel *evsel)
0166 {
0167 int ctx = 0;
0168
0169 if (evsel->core.attr.exclude_kernel)
0170 ctx |= CTX_BIT_KERNEL;
0171 if (evsel->core.attr.exclude_user)
0172 ctx |= CTX_BIT_USER;
0173 if (evsel->core.attr.exclude_hv)
0174 ctx |= CTX_BIT_HV;
0175 if (evsel->core.attr.exclude_host)
0176 ctx |= CTX_BIT_HOST;
0177 if (evsel->core.attr.exclude_idle)
0178 ctx |= CTX_BIT_IDLE;
0179
0180 return ctx;
0181 }
0182
0183 static void reset_stat(struct runtime_stat *st)
0184 {
0185 struct rblist *rblist;
0186 struct rb_node *pos, *next;
0187
0188 rblist = &st->value_list;
0189 next = rb_first_cached(&rblist->entries);
0190 while (next) {
0191 pos = next;
0192 next = rb_next(pos);
0193 memset(&container_of(pos, struct saved_value, rb_node)->stats,
0194 0,
0195 sizeof(struct stats));
0196 }
0197 }
0198
0199 void perf_stat__reset_shadow_stats(void)
0200 {
0201 reset_stat(&rt_stat);
0202 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
0203 memset(&ru_stats, 0, sizeof(ru_stats));
0204 }
0205
0206 void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)
0207 {
0208 reset_stat(st);
0209 }
0210
0211 struct runtime_stat_data {
0212 int ctx;
0213 struct cgroup *cgrp;
0214 };
0215
0216 static void update_runtime_stat(struct runtime_stat *st,
0217 enum stat_type type,
0218 int cpu_map_idx, u64 count,
0219 struct runtime_stat_data *rsd)
0220 {
0221 struct saved_value *v = saved_value_lookup(NULL, cpu_map_idx, true, type,
0222 rsd->ctx, st, rsd->cgrp);
0223
0224 if (v)
0225 update_stats(&v->stats, count);
0226 }
0227
0228
0229
0230
0231
0232
0233 void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
0234 int cpu_map_idx, struct runtime_stat *st)
0235 {
0236 u64 count_ns = count;
0237 struct saved_value *v;
0238 struct runtime_stat_data rsd = {
0239 .ctx = evsel_context(counter),
0240 .cgrp = counter->cgrp,
0241 };
0242
0243 count *= counter->scale;
0244
0245 if (evsel__is_clock(counter))
0246 update_runtime_stat(st, STAT_NSECS, cpu_map_idx, count_ns, &rsd);
0247 else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
0248 update_runtime_stat(st, STAT_CYCLES, cpu_map_idx, count, &rsd);
0249 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
0250 update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu_map_idx, count, &rsd);
0251 else if (perf_stat_evsel__is(counter, TRANSACTION_START))
0252 update_runtime_stat(st, STAT_TRANSACTION, cpu_map_idx, count, &rsd);
0253 else if (perf_stat_evsel__is(counter, ELISION_START))
0254 update_runtime_stat(st, STAT_ELISION, cpu_map_idx, count, &rsd);
0255 else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
0256 update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
0257 cpu_map_idx, count, &rsd);
0258 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
0259 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
0260 cpu_map_idx, count, &rsd);
0261 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
0262 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
0263 cpu_map_idx, count, &rsd);
0264 else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
0265 update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
0266 cpu_map_idx, count, &rsd);
0267 else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
0268 update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
0269 cpu_map_idx, count, &rsd);
0270 else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING))
0271 update_runtime_stat(st, STAT_TOPDOWN_RETIRING,
0272 cpu_map_idx, count, &rsd);
0273 else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC))
0274 update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC,
0275 cpu_map_idx, count, &rsd);
0276 else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND))
0277 update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND,
0278 cpu_map_idx, count, &rsd);
0279 else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
0280 update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
0281 cpu_map_idx, count, &rsd);
0282 else if (perf_stat_evsel__is(counter, TOPDOWN_HEAVY_OPS))
0283 update_runtime_stat(st, STAT_TOPDOWN_HEAVY_OPS,
0284 cpu_map_idx, count, &rsd);
0285 else if (perf_stat_evsel__is(counter, TOPDOWN_BR_MISPREDICT))
0286 update_runtime_stat(st, STAT_TOPDOWN_BR_MISPREDICT,
0287 cpu_map_idx, count, &rsd);
0288 else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_LAT))
0289 update_runtime_stat(st, STAT_TOPDOWN_FETCH_LAT,
0290 cpu_map_idx, count, &rsd);
0291 else if (perf_stat_evsel__is(counter, TOPDOWN_MEM_BOUND))
0292 update_runtime_stat(st, STAT_TOPDOWN_MEM_BOUND,
0293 cpu_map_idx, count, &rsd);
0294 else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
0295 update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
0296 cpu_map_idx, count, &rsd);
0297 else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
0298 update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
0299 cpu_map_idx, count, &rsd);
0300 else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
0301 update_runtime_stat(st, STAT_BRANCHES, cpu_map_idx, count, &rsd);
0302 else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
0303 update_runtime_stat(st, STAT_CACHEREFS, cpu_map_idx, count, &rsd);
0304 else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
0305 update_runtime_stat(st, STAT_L1_DCACHE, cpu_map_idx, count, &rsd);
0306 else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
0307 update_runtime_stat(st, STAT_L1_ICACHE, cpu_map_idx, count, &rsd);
0308 else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL))
0309 update_runtime_stat(st, STAT_LL_CACHE, cpu_map_idx, count, &rsd);
0310 else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
0311 update_runtime_stat(st, STAT_DTLB_CACHE, cpu_map_idx, count, &rsd);
0312 else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
0313 update_runtime_stat(st, STAT_ITLB_CACHE, cpu_map_idx, count, &rsd);
0314 else if (perf_stat_evsel__is(counter, SMI_NUM))
0315 update_runtime_stat(st, STAT_SMI_NUM, cpu_map_idx, count, &rsd);
0316 else if (perf_stat_evsel__is(counter, APERF))
0317 update_runtime_stat(st, STAT_APERF, cpu_map_idx, count, &rsd);
0318
0319 if (counter->collect_stat) {
0320 v = saved_value_lookup(counter, cpu_map_idx, true, STAT_NONE, 0, st,
0321 rsd.cgrp);
0322 update_stats(&v->stats, count);
0323 if (counter->metric_leader)
0324 v->metric_total += count;
0325 } else if (counter->metric_leader) {
0326 v = saved_value_lookup(counter->metric_leader,
0327 cpu_map_idx, true, STAT_NONE, 0, st, rsd.cgrp);
0328 v->metric_total += count;
0329 v->metric_other++;
0330 }
0331 }
0332
0333
0334 enum grc_type {
0335 GRC_STALLED_CYCLES_FE,
0336 GRC_STALLED_CYCLES_BE,
0337 GRC_CACHE_MISSES,
0338 GRC_MAX_NR
0339 };
0340
0341 static const char *get_ratio_color(enum grc_type type, double ratio)
0342 {
0343 static const double grc_table[GRC_MAX_NR][3] = {
0344 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
0345 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
0346 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
0347 };
0348 const char *color = PERF_COLOR_NORMAL;
0349
0350 if (ratio > grc_table[type][0])
0351 color = PERF_COLOR_RED;
0352 else if (ratio > grc_table[type][1])
0353 color = PERF_COLOR_MAGENTA;
0354 else if (ratio > grc_table[type][2])
0355 color = PERF_COLOR_YELLOW;
0356
0357 return color;
0358 }
0359
0360 static struct evsel *perf_stat__find_event(struct evlist *evsel_list,
0361 const char *name)
0362 {
0363 struct evsel *c2;
0364
0365 evlist__for_each_entry (evsel_list, c2) {
0366 if (!strcasecmp(c2->name, name) && !c2->collect_stat)
0367 return c2;
0368 }
0369 return NULL;
0370 }
0371
0372
0373 void perf_stat__collect_metric_expr(struct evlist *evsel_list)
0374 {
0375 struct evsel *counter, *leader, **metric_events, *oc;
0376 bool found;
0377 struct expr_parse_ctx *ctx;
0378 struct hashmap_entry *cur;
0379 size_t bkt;
0380 int i;
0381
0382 ctx = expr__ctx_new();
0383 if (!ctx) {
0384 pr_debug("expr__ctx_new failed");
0385 return;
0386 }
0387 evlist__for_each_entry(evsel_list, counter) {
0388 bool invalid = false;
0389
0390 leader = evsel__leader(counter);
0391 if (!counter->metric_expr)
0392 continue;
0393
0394 expr__ctx_clear(ctx);
0395 metric_events = counter->metric_events;
0396 if (!metric_events) {
0397 if (expr__find_ids(counter->metric_expr,
0398 counter->name,
0399 ctx) < 0)
0400 continue;
0401
0402 metric_events = calloc(sizeof(struct evsel *),
0403 hashmap__size(ctx->ids) + 1);
0404 if (!metric_events) {
0405 expr__ctx_free(ctx);
0406 return;
0407 }
0408 counter->metric_events = metric_events;
0409 }
0410
0411 i = 0;
0412 hashmap__for_each_entry(ctx->ids, cur, bkt) {
0413 const char *metric_name = (const char *)cur->key;
0414
0415 found = false;
0416 if (leader) {
0417
0418 for_each_group_member (oc, leader) {
0419 if (!strcasecmp(oc->name,
0420 metric_name) &&
0421 !oc->collect_stat) {
0422 found = true;
0423 break;
0424 }
0425 }
0426 }
0427 if (!found) {
0428
0429 oc = perf_stat__find_event(evsel_list,
0430 metric_name);
0431 }
0432 if (!oc) {
0433
0434 static char *printed;
0435
0436
0437
0438
0439
0440
0441
0442
0443 if (!printed ||
0444 strcasecmp(printed, metric_name)) {
0445 fprintf(stderr,
0446 "Add %s event to groups to get metric expression for %s\n",
0447 metric_name,
0448 counter->name);
0449 free(printed);
0450 printed = strdup(metric_name);
0451 }
0452 invalid = true;
0453 continue;
0454 }
0455 metric_events[i++] = oc;
0456 oc->collect_stat = true;
0457 }
0458 metric_events[i] = NULL;
0459 if (invalid) {
0460 free(metric_events);
0461 counter->metric_events = NULL;
0462 counter->metric_expr = NULL;
0463 }
0464 }
0465 expr__ctx_free(ctx);
0466 }
0467
0468 static double runtime_stat_avg(struct runtime_stat *st,
0469 enum stat_type type, int cpu_map_idx,
0470 struct runtime_stat_data *rsd)
0471 {
0472 struct saved_value *v;
0473
0474 v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp);
0475 if (!v)
0476 return 0.0;
0477
0478 return avg_stats(&v->stats);
0479 }
0480
0481 static double runtime_stat_n(struct runtime_stat *st,
0482 enum stat_type type, int cpu_map_idx,
0483 struct runtime_stat_data *rsd)
0484 {
0485 struct saved_value *v;
0486
0487 v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp);
0488 if (!v)
0489 return 0.0;
0490
0491 return v->stats.n;
0492 }
0493
0494 static void print_stalled_cycles_frontend(struct perf_stat_config *config,
0495 int cpu_map_idx, double avg,
0496 struct perf_stat_output_ctx *out,
0497 struct runtime_stat *st,
0498 struct runtime_stat_data *rsd)
0499 {
0500 double total, ratio = 0.0;
0501 const char *color;
0502
0503 total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
0504
0505 if (total)
0506 ratio = avg / total * 100.0;
0507
0508 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
0509
0510 if (ratio)
0511 out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle",
0512 ratio);
0513 else
0514 out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0);
0515 }
0516
0517 static void print_stalled_cycles_backend(struct perf_stat_config *config,
0518 int cpu_map_idx, double avg,
0519 struct perf_stat_output_ctx *out,
0520 struct runtime_stat *st,
0521 struct runtime_stat_data *rsd)
0522 {
0523 double total, ratio = 0.0;
0524 const char *color;
0525
0526 total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
0527
0528 if (total)
0529 ratio = avg / total * 100.0;
0530
0531 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
0532
0533 out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
0534 }
0535
0536 static void print_branch_misses(struct perf_stat_config *config,
0537 int cpu_map_idx, double avg,
0538 struct perf_stat_output_ctx *out,
0539 struct runtime_stat *st,
0540 struct runtime_stat_data *rsd)
0541 {
0542 double total, ratio = 0.0;
0543 const char *color;
0544
0545 total = runtime_stat_avg(st, STAT_BRANCHES, cpu_map_idx, rsd);
0546
0547 if (total)
0548 ratio = avg / total * 100.0;
0549
0550 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
0551
0552 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio);
0553 }
0554
0555 static void print_l1_dcache_misses(struct perf_stat_config *config,
0556 int cpu_map_idx, double avg,
0557 struct perf_stat_output_ctx *out,
0558 struct runtime_stat *st,
0559 struct runtime_stat_data *rsd)
0560 {
0561 double total, ratio = 0.0;
0562 const char *color;
0563
0564 total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu_map_idx, rsd);
0565
0566 if (total)
0567 ratio = avg / total * 100.0;
0568
0569 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
0570
0571 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache accesses", ratio);
0572 }
0573
0574 static void print_l1_icache_misses(struct perf_stat_config *config,
0575 int cpu_map_idx, double avg,
0576 struct perf_stat_output_ctx *out,
0577 struct runtime_stat *st,
0578 struct runtime_stat_data *rsd)
0579 {
0580 double total, ratio = 0.0;
0581 const char *color;
0582
0583 total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu_map_idx, rsd);
0584
0585 if (total)
0586 ratio = avg / total * 100.0;
0587
0588 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
0589 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache accesses", ratio);
0590 }
0591
0592 static void print_dtlb_cache_misses(struct perf_stat_config *config,
0593 int cpu_map_idx, double avg,
0594 struct perf_stat_output_ctx *out,
0595 struct runtime_stat *st,
0596 struct runtime_stat_data *rsd)
0597 {
0598 double total, ratio = 0.0;
0599 const char *color;
0600
0601 total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu_map_idx, rsd);
0602
0603 if (total)
0604 ratio = avg / total * 100.0;
0605
0606 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
0607 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache accesses", ratio);
0608 }
0609
0610 static void print_itlb_cache_misses(struct perf_stat_config *config,
0611 int cpu_map_idx, double avg,
0612 struct perf_stat_output_ctx *out,
0613 struct runtime_stat *st,
0614 struct runtime_stat_data *rsd)
0615 {
0616 double total, ratio = 0.0;
0617 const char *color;
0618
0619 total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu_map_idx, rsd);
0620
0621 if (total)
0622 ratio = avg / total * 100.0;
0623
0624 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
0625 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache accesses", ratio);
0626 }
0627
0628 static void print_ll_cache_misses(struct perf_stat_config *config,
0629 int cpu_map_idx, double avg,
0630 struct perf_stat_output_ctx *out,
0631 struct runtime_stat *st,
0632 struct runtime_stat_data *rsd)
0633 {
0634 double total, ratio = 0.0;
0635 const char *color;
0636
0637 total = runtime_stat_avg(st, STAT_LL_CACHE, cpu_map_idx, rsd);
0638
0639 if (total)
0640 ratio = avg / total * 100.0;
0641
0642 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
0643 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache accesses", ratio);
0644 }
0645
0646
0647
0648
0649
0650
0651
0652
0653
0654
0655
0656
0657
0658
0659
0660
0661
0662
0663
0664
0665
0666
0667
0668
0669
0670
0671
0672
0673
0674
0675
0676
0677
0678
0679
0680
0681
0682
0683
0684
0685
0686
0687
0688 static double sanitize_val(double x)
0689 {
0690 if (x < 0 && x >= -0.02)
0691 return 0.0;
0692 return x;
0693 }
0694
0695 static double td_total_slots(int cpu_map_idx, struct runtime_stat *st,
0696 struct runtime_stat_data *rsd)
0697 {
0698 return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu_map_idx, rsd);
0699 }
0700
0701 static double td_bad_spec(int cpu_map_idx, struct runtime_stat *st,
0702 struct runtime_stat_data *rsd)
0703 {
0704 double bad_spec = 0;
0705 double total_slots;
0706 double total;
0707
0708 total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu_map_idx, rsd) -
0709 runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu_map_idx, rsd) +
0710 runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu_map_idx, rsd);
0711
0712 total_slots = td_total_slots(cpu_map_idx, st, rsd);
0713 if (total_slots)
0714 bad_spec = total / total_slots;
0715 return sanitize_val(bad_spec);
0716 }
0717
0718 static double td_retiring(int cpu_map_idx, struct runtime_stat *st,
0719 struct runtime_stat_data *rsd)
0720 {
0721 double retiring = 0;
0722 double total_slots = td_total_slots(cpu_map_idx, st, rsd);
0723 double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
0724 cpu_map_idx, rsd);
0725
0726 if (total_slots)
0727 retiring = ret_slots / total_slots;
0728 return retiring;
0729 }
0730
0731 static double td_fe_bound(int cpu_map_idx, struct runtime_stat *st,
0732 struct runtime_stat_data *rsd)
0733 {
0734 double fe_bound = 0;
0735 double total_slots = td_total_slots(cpu_map_idx, st, rsd);
0736 double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
0737 cpu_map_idx, rsd);
0738
0739 if (total_slots)
0740 fe_bound = fetch_bub / total_slots;
0741 return fe_bound;
0742 }
0743
0744 static double td_be_bound(int cpu_map_idx, struct runtime_stat *st,
0745 struct runtime_stat_data *rsd)
0746 {
0747 double sum = (td_fe_bound(cpu_map_idx, st, rsd) +
0748 td_bad_spec(cpu_map_idx, st, rsd) +
0749 td_retiring(cpu_map_idx, st, rsd));
0750 if (sum == 0)
0751 return 0;
0752 return sanitize_val(1.0 - sum);
0753 }
0754
0755
0756
0757
0758
0759
0760 static double td_metric_ratio(int cpu_map_idx, enum stat_type type,
0761 struct runtime_stat *stat,
0762 struct runtime_stat_data *rsd)
0763 {
0764 double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) +
0765 runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) +
0766 runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) +
0767 runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd);
0768 double d = runtime_stat_avg(stat, type, cpu_map_idx, rsd);
0769
0770 if (sum)
0771 return d / sum;
0772 return 0;
0773 }
0774
0775
0776
0777
0778
0779
0780 static bool full_td(int cpu_map_idx, struct runtime_stat *stat,
0781 struct runtime_stat_data *rsd)
0782 {
0783 int c = 0;
0784
0785 if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) > 0)
0786 c++;
0787 if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) > 0)
0788 c++;
0789 if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) > 0)
0790 c++;
0791 if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd) > 0)
0792 c++;
0793 return c >= 2;
0794 }
0795
0796 static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx,
0797 struct perf_stat_output_ctx *out,
0798 struct runtime_stat *st,
0799 struct runtime_stat_data *rsd)
0800 {
0801 double smi_num, aperf, cycles, cost = 0.0;
0802 const char *color = NULL;
0803
0804 smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu_map_idx, rsd);
0805 aperf = runtime_stat_avg(st, STAT_APERF, cpu_map_idx, rsd);
0806 cycles = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
0807
0808 if ((cycles == 0) || (aperf == 0))
0809 return;
0810
0811 if (smi_num)
0812 cost = (aperf - cycles) / aperf * 100.00;
0813
0814 if (cost > 10)
0815 color = PERF_COLOR_RED;
0816 out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
0817 out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num);
0818 }
0819
0820 static int prepare_metric(struct evsel **metric_events,
0821 struct metric_ref *metric_refs,
0822 struct expr_parse_ctx *pctx,
0823 int cpu_map_idx,
0824 struct runtime_stat *st)
0825 {
0826 double scale;
0827 char *n;
0828 int i, j, ret;
0829
0830 for (i = 0; metric_events[i]; i++) {
0831 struct saved_value *v;
0832 struct stats *stats;
0833 u64 metric_total = 0;
0834 int source_count;
0835
0836 if (evsel__is_tool(metric_events[i])) {
0837 source_count = 1;
0838 switch (metric_events[i]->tool_event) {
0839 case PERF_TOOL_DURATION_TIME:
0840 stats = &walltime_nsecs_stats;
0841 scale = 1e-9;
0842 break;
0843 case PERF_TOOL_USER_TIME:
0844 stats = &ru_stats.ru_utime_usec_stat;
0845 scale = 1e-6;
0846 break;
0847 case PERF_TOOL_SYSTEM_TIME:
0848 stats = &ru_stats.ru_stime_usec_stat;
0849 scale = 1e-6;
0850 break;
0851 case PERF_TOOL_NONE:
0852 pr_err("Invalid tool event 'none'");
0853 abort();
0854 case PERF_TOOL_MAX:
0855 pr_err("Invalid tool event 'max'");
0856 abort();
0857 default:
0858 pr_err("Unknown tool event '%s'", evsel__name(metric_events[i]));
0859 abort();
0860 }
0861 } else {
0862 v = saved_value_lookup(metric_events[i], cpu_map_idx, false,
0863 STAT_NONE, 0, st,
0864 metric_events[i]->cgrp);
0865 if (!v)
0866 break;
0867 stats = &v->stats;
0868 scale = 1.0;
0869 source_count = evsel__source_count(metric_events[i]);
0870
0871 if (v->metric_other)
0872 metric_total = v->metric_total;
0873 }
0874 n = strdup(evsel__metric_id(metric_events[i]));
0875 if (!n)
0876 return -ENOMEM;
0877
0878 expr__add_id_val_source_count(pctx, n,
0879 metric_total ? : avg_stats(stats) * scale,
0880 source_count);
0881 }
0882
0883 for (j = 0; metric_refs && metric_refs[j].metric_name; j++) {
0884 ret = expr__add_ref(pctx, &metric_refs[j]);
0885 if (ret)
0886 return ret;
0887 }
0888
0889 return i;
0890 }
0891
0892 static void generic_metric(struct perf_stat_config *config,
0893 const char *metric_expr,
0894 struct evsel **metric_events,
0895 struct metric_ref *metric_refs,
0896 char *name,
0897 const char *metric_name,
0898 const char *metric_unit,
0899 int runtime,
0900 int cpu_map_idx,
0901 struct perf_stat_output_ctx *out,
0902 struct runtime_stat *st)
0903 {
0904 print_metric_t print_metric = out->print_metric;
0905 struct expr_parse_ctx *pctx;
0906 double ratio, scale;
0907 int i;
0908 void *ctxp = out->ctx;
0909
0910 pctx = expr__ctx_new();
0911 if (!pctx)
0912 return;
0913
0914 pctx->runtime = runtime;
0915 i = prepare_metric(metric_events, metric_refs, pctx, cpu_map_idx, st);
0916 if (i < 0) {
0917 expr__ctx_free(pctx);
0918 return;
0919 }
0920 if (!metric_events[i]) {
0921 if (expr__parse(&ratio, pctx, metric_expr) == 0) {
0922 char *unit;
0923 char metric_bf[64];
0924
0925 if (metric_unit && metric_name) {
0926 if (perf_pmu__convert_scale(metric_unit,
0927 &unit, &scale) >= 0) {
0928 ratio *= scale;
0929 }
0930 if (strstr(metric_expr, "?"))
0931 scnprintf(metric_bf, sizeof(metric_bf),
0932 "%s %s_%d", unit, metric_name, runtime);
0933 else
0934 scnprintf(metric_bf, sizeof(metric_bf),
0935 "%s %s", unit, metric_name);
0936
0937 print_metric(config, ctxp, NULL, "%8.1f",
0938 metric_bf, ratio);
0939 } else {
0940 print_metric(config, ctxp, NULL, "%8.2f",
0941 metric_name ?
0942 metric_name :
0943 out->force_header ? name : "",
0944 ratio);
0945 }
0946 } else {
0947 print_metric(config, ctxp, NULL, NULL,
0948 out->force_header ?
0949 (metric_name ? metric_name : name) : "", 0);
0950 }
0951 } else {
0952 print_metric(config, ctxp, NULL, NULL,
0953 out->force_header ?
0954 (metric_name ? metric_name : name) : "", 0);
0955 }
0956
0957 expr__ctx_free(pctx);
0958 }
0959
0960 double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st)
0961 {
0962 struct expr_parse_ctx *pctx;
0963 double ratio = 0.0;
0964
0965 pctx = expr__ctx_new();
0966 if (!pctx)
0967 return NAN;
0968
0969 if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu_map_idx, st) < 0)
0970 goto out;
0971
0972 if (expr__parse(&ratio, pctx, mexp->metric_expr))
0973 ratio = 0.0;
0974
0975 out:
0976 expr__ctx_free(pctx);
0977 return ratio;
0978 }
0979
0980 void perf_stat__print_shadow_stats(struct perf_stat_config *config,
0981 struct evsel *evsel,
0982 double avg, int cpu_map_idx,
0983 struct perf_stat_output_ctx *out,
0984 struct rblist *metric_events,
0985 struct runtime_stat *st)
0986 {
0987 void *ctxp = out->ctx;
0988 print_metric_t print_metric = out->print_metric;
0989 double total, ratio = 0.0, total2;
0990 const char *color = NULL;
0991 struct runtime_stat_data rsd = {
0992 .ctx = evsel_context(evsel),
0993 .cgrp = evsel->cgrp,
0994 };
0995 struct metric_event *me;
0996 int num = 1;
0997
0998 if (config->iostat_run) {
0999 iostat_print_metric(config, evsel, out);
1000 } else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
1001 total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
1002
1003 if (total) {
1004 ratio = avg / total;
1005 print_metric(config, ctxp, NULL, "%7.2f ",
1006 "insn per cycle", ratio);
1007 } else {
1008 print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
1009 }
1010
1011 total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu_map_idx, &rsd);
1012
1013 total = max(total, runtime_stat_avg(st,
1014 STAT_STALLED_CYCLES_BACK,
1015 cpu_map_idx, &rsd));
1016
1017 if (total && avg) {
1018 out->new_line(config, ctxp);
1019 ratio = total / avg;
1020 print_metric(config, ctxp, NULL, "%7.2f ",
1021 "stalled cycles per insn",
1022 ratio);
1023 }
1024 } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
1025 if (runtime_stat_n(st, STAT_BRANCHES, cpu_map_idx, &rsd) != 0)
1026 print_branch_misses(config, cpu_map_idx, avg, out, st, &rsd);
1027 else
1028 print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
1029 } else if (
1030 evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
1031 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_L1D |
1032 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1033 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
1034
1035 if (runtime_stat_n(st, STAT_L1_DCACHE, cpu_map_idx, &rsd) != 0)
1036 print_l1_dcache_misses(config, cpu_map_idx, avg, out, st, &rsd);
1037 else
1038 print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0);
1039 } else if (
1040 evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
1041 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_L1I |
1042 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1043 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
1044
1045 if (runtime_stat_n(st, STAT_L1_ICACHE, cpu_map_idx, &rsd) != 0)
1046 print_l1_icache_misses(config, cpu_map_idx, avg, out, st, &rsd);
1047 else
1048 print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0);
1049 } else if (
1050 evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
1051 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
1052 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1053 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
1054
1055 if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu_map_idx, &rsd) != 0)
1056 print_dtlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
1057 else
1058 print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0);
1059 } else if (
1060 evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
1061 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
1062 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1063 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
1064
1065 if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu_map_idx, &rsd) != 0)
1066 print_itlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
1067 else
1068 print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0);
1069 } else if (
1070 evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
1071 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_LL |
1072 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1073 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
1074
1075 if (runtime_stat_n(st, STAT_LL_CACHE, cpu_map_idx, &rsd) != 0)
1076 print_ll_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
1077 else
1078 print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0);
1079 } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
1080 total = runtime_stat_avg(st, STAT_CACHEREFS, cpu_map_idx, &rsd);
1081
1082 if (total)
1083 ratio = avg * 100 / total;
1084
1085 if (runtime_stat_n(st, STAT_CACHEREFS, cpu_map_idx, &rsd) != 0)
1086 print_metric(config, ctxp, NULL, "%8.3f %%",
1087 "of all cache refs", ratio);
1088 else
1089 print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
1090 } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
1091 print_stalled_cycles_frontend(config, cpu_map_idx, avg, out, st, &rsd);
1092 } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
1093 print_stalled_cycles_backend(config, cpu_map_idx, avg, out, st, &rsd);
1094 } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
1095 total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd);
1096
1097 if (total) {
1098 ratio = avg / total;
1099 print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio);
1100 } else {
1101 print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
1102 }
1103 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
1104 total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
1105
1106 if (total)
1107 print_metric(config, ctxp, NULL,
1108 "%7.2f%%", "transactional cycles",
1109 100.0 * (avg / total));
1110 else
1111 print_metric(config, ctxp, NULL, NULL, "transactional cycles",
1112 0);
1113 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
1114 total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
1115 total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
1116
1117 if (total2 < avg)
1118 total2 = avg;
1119 if (total)
1120 print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles",
1121 100.0 * ((total2-avg) / total));
1122 else
1123 print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
1124 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
1125 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
1126
1127 if (avg)
1128 ratio = total / avg;
1129
1130 if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd) != 0)
1131 print_metric(config, ctxp, NULL, "%8.0f",
1132 "cycles / transaction", ratio);
1133 else
1134 print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
1135 0);
1136 } else if (perf_stat_evsel__is(evsel, ELISION_START)) {
1137 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
1138
1139 if (avg)
1140 ratio = total / avg;
1141
1142 print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio);
1143 } else if (evsel__is_clock(evsel)) {
1144 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
1145 print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized",
1146 avg / (ratio * evsel->scale));
1147 else
1148 print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
1149 } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
1150 double fe_bound = td_fe_bound(cpu_map_idx, st, &rsd);
1151
1152 if (fe_bound > 0.2)
1153 color = PERF_COLOR_RED;
1154 print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
1155 fe_bound * 100.);
1156 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
1157 double retiring = td_retiring(cpu_map_idx, st, &rsd);
1158
1159 if (retiring > 0.7)
1160 color = PERF_COLOR_GREEN;
1161 print_metric(config, ctxp, color, "%8.1f%%", "retiring",
1162 retiring * 100.);
1163 } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
1164 double bad_spec = td_bad_spec(cpu_map_idx, st, &rsd);
1165
1166 if (bad_spec > 0.1)
1167 color = PERF_COLOR_RED;
1168 print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
1169 bad_spec * 100.);
1170 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
1171 double be_bound = td_be_bound(cpu_map_idx, st, &rsd);
1172 const char *name = "backend bound";
1173 static int have_recovery_bubbles = -1;
1174
1175
1176 if (have_recovery_bubbles < 0)
1177 have_recovery_bubbles = pmu_have_event("cpu",
1178 "topdown-recovery-bubbles");
1179 if (!have_recovery_bubbles)
1180 name = "backend bound/bad spec";
1181
1182 if (be_bound > 0.2)
1183 color = PERF_COLOR_RED;
1184 if (td_total_slots(cpu_map_idx, st, &rsd) > 0)
1185 print_metric(config, ctxp, color, "%8.1f%%", name,
1186 be_bound * 100.);
1187 else
1188 print_metric(config, ctxp, NULL, NULL, name, 0);
1189 } else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) &&
1190 full_td(cpu_map_idx, st, &rsd)) {
1191 double retiring = td_metric_ratio(cpu_map_idx,
1192 STAT_TOPDOWN_RETIRING, st,
1193 &rsd);
1194 if (retiring > 0.7)
1195 color = PERF_COLOR_GREEN;
1196 print_metric(config, ctxp, color, "%8.1f%%", "Retiring",
1197 retiring * 100.);
1198 } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
1199 full_td(cpu_map_idx, st, &rsd)) {
1200 double fe_bound = td_metric_ratio(cpu_map_idx,
1201 STAT_TOPDOWN_FE_BOUND, st,
1202 &rsd);
1203 if (fe_bound > 0.2)
1204 color = PERF_COLOR_RED;
1205 print_metric(config, ctxp, color, "%8.1f%%", "Frontend Bound",
1206 fe_bound * 100.);
1207 } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
1208 full_td(cpu_map_idx, st, &rsd)) {
1209 double be_bound = td_metric_ratio(cpu_map_idx,
1210 STAT_TOPDOWN_BE_BOUND, st,
1211 &rsd);
1212 if (be_bound > 0.2)
1213 color = PERF_COLOR_RED;
1214 print_metric(config, ctxp, color, "%8.1f%%", "Backend Bound",
1215 be_bound * 100.);
1216 } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
1217 full_td(cpu_map_idx, st, &rsd)) {
1218 double bad_spec = td_metric_ratio(cpu_map_idx,
1219 STAT_TOPDOWN_BAD_SPEC, st,
1220 &rsd);
1221 if (bad_spec > 0.1)
1222 color = PERF_COLOR_RED;
1223 print_metric(config, ctxp, color, "%8.1f%%", "Bad Speculation",
1224 bad_spec * 100.);
1225 } else if (perf_stat_evsel__is(evsel, TOPDOWN_HEAVY_OPS) &&
1226 full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
1227 double retiring = td_metric_ratio(cpu_map_idx,
1228 STAT_TOPDOWN_RETIRING, st,
1229 &rsd);
1230 double heavy_ops = td_metric_ratio(cpu_map_idx,
1231 STAT_TOPDOWN_HEAVY_OPS, st,
1232 &rsd);
1233 double light_ops = retiring - heavy_ops;
1234
1235 if (retiring > 0.7 && heavy_ops > 0.1)
1236 color = PERF_COLOR_GREEN;
1237 print_metric(config, ctxp, color, "%8.1f%%", "Heavy Operations",
1238 heavy_ops * 100.);
1239 if (retiring > 0.7 && light_ops > 0.6)
1240 color = PERF_COLOR_GREEN;
1241 else
1242 color = NULL;
1243 print_metric(config, ctxp, color, "%8.1f%%", "Light Operations",
1244 light_ops * 100.);
1245 } else if (perf_stat_evsel__is(evsel, TOPDOWN_BR_MISPREDICT) &&
1246 full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
1247 double bad_spec = td_metric_ratio(cpu_map_idx,
1248 STAT_TOPDOWN_BAD_SPEC, st,
1249 &rsd);
1250 double br_mis = td_metric_ratio(cpu_map_idx,
1251 STAT_TOPDOWN_BR_MISPREDICT, st,
1252 &rsd);
1253 double m_clears = bad_spec - br_mis;
1254
1255 if (bad_spec > 0.1 && br_mis > 0.05)
1256 color = PERF_COLOR_RED;
1257 print_metric(config, ctxp, color, "%8.1f%%", "Branch Mispredict",
1258 br_mis * 100.);
1259 if (bad_spec > 0.1 && m_clears > 0.05)
1260 color = PERF_COLOR_RED;
1261 else
1262 color = NULL;
1263 print_metric(config, ctxp, color, "%8.1f%%", "Machine Clears",
1264 m_clears * 100.);
1265 } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_LAT) &&
1266 full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
1267 double fe_bound = td_metric_ratio(cpu_map_idx,
1268 STAT_TOPDOWN_FE_BOUND, st,
1269 &rsd);
1270 double fetch_lat = td_metric_ratio(cpu_map_idx,
1271 STAT_TOPDOWN_FETCH_LAT, st,
1272 &rsd);
1273 double fetch_bw = fe_bound - fetch_lat;
1274
1275 if (fe_bound > 0.2 && fetch_lat > 0.15)
1276 color = PERF_COLOR_RED;
1277 print_metric(config, ctxp, color, "%8.1f%%", "Fetch Latency",
1278 fetch_lat * 100.);
1279 if (fe_bound > 0.2 && fetch_bw > 0.1)
1280 color = PERF_COLOR_RED;
1281 else
1282 color = NULL;
1283 print_metric(config, ctxp, color, "%8.1f%%", "Fetch Bandwidth",
1284 fetch_bw * 100.);
1285 } else if (perf_stat_evsel__is(evsel, TOPDOWN_MEM_BOUND) &&
1286 full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
1287 double be_bound = td_metric_ratio(cpu_map_idx,
1288 STAT_TOPDOWN_BE_BOUND, st,
1289 &rsd);
1290 double mem_bound = td_metric_ratio(cpu_map_idx,
1291 STAT_TOPDOWN_MEM_BOUND, st,
1292 &rsd);
1293 double core_bound = be_bound - mem_bound;
1294
1295 if (be_bound > 0.2 && mem_bound > 0.2)
1296 color = PERF_COLOR_RED;
1297 print_metric(config, ctxp, color, "%8.1f%%", "Memory Bound",
1298 mem_bound * 100.);
1299 if (be_bound > 0.2 && core_bound > 0.1)
1300 color = PERF_COLOR_RED;
1301 else
1302 color = NULL;
1303 print_metric(config, ctxp, color, "%8.1f%%", "Core Bound",
1304 core_bound * 100.);
1305 } else if (evsel->metric_expr) {
1306 generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
1307 evsel->name, evsel->metric_name, NULL, 1, cpu_map_idx, out, st);
1308 } else if (runtime_stat_n(st, STAT_NSECS, cpu_map_idx, &rsd) != 0) {
1309 char unit = ' ';
1310 char unit_buf[10] = "/sec";
1311
1312 total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd);
1313 if (total)
1314 ratio = convert_unit_double(1000000000.0 * avg / total, &unit);
1315
1316 if (unit != ' ')
1317 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
1318 print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
1319 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
1320 print_smi_cost(config, cpu_map_idx, out, st, &rsd);
1321 } else {
1322 num = 0;
1323 }
1324
1325 if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) {
1326 struct metric_expr *mexp;
1327
1328 list_for_each_entry (mexp, &me->head, nd) {
1329 if (num++ > 0)
1330 out->new_line(config, ctxp);
1331 generic_metric(config, mexp->metric_expr, mexp->metric_events,
1332 mexp->metric_refs, evsel->name, mexp->metric_name,
1333 mexp->metric_unit, mexp->runtime, cpu_map_idx, out, st);
1334 }
1335 }
1336 if (num == 0)
1337 print_metric(config, ctxp, NULL, NULL, NULL, 0);
1338 }