Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 #include <math.h>
0003 #include <stdio.h>
0004 #include "evsel.h"
0005 #include "stat.h"
0006 #include "color.h"
0007 #include "debug.h"
0008 #include "pmu.h"
0009 #include "rblist.h"
0010 #include "evlist.h"
0011 #include "expr.h"
0012 #include "metricgroup.h"
0013 #include "cgroup.h"
0014 #include "units.h"
0015 #include <linux/zalloc.h>
0016 #include "iostat.h"
0017 
0018 /*
0019  * AGGR_GLOBAL: Use CPU 0
0020  * AGGR_SOCKET: Use first CPU of socket
0021  * AGGR_DIE: Use first CPU of die
0022  * AGGR_CORE: Use first CPU of core
0023  * AGGR_NONE: Use matching CPU
0024  * AGGR_THREAD: Not supported?
0025  */
0026 
0027 struct runtime_stat rt_stat;
0028 struct stats walltime_nsecs_stats;
0029 struct rusage_stats ru_stats;
0030 
0031 struct saved_value {
0032     struct rb_node rb_node;
0033     struct evsel *evsel;
0034     enum stat_type type;
0035     int ctx;
0036     int cpu_map_idx;
0037     struct cgroup *cgrp;
0038     struct runtime_stat *stat;
0039     struct stats stats;
0040     u64 metric_total;
0041     int metric_other;
0042 };
0043 
0044 static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
0045 {
0046     struct saved_value *a = container_of(rb_node,
0047                          struct saved_value,
0048                          rb_node);
0049     const struct saved_value *b = entry;
0050 
0051     if (a->cpu_map_idx != b->cpu_map_idx)
0052         return a->cpu_map_idx - b->cpu_map_idx;
0053 
0054     /*
0055      * Previously the rbtree was used to link generic metrics.
0056      * The keys were evsel/cpu. Now the rbtree is extended to support
0057      * per-thread shadow stats. For shadow stats case, the keys
0058      * are cpu/type/ctx/stat (evsel is NULL). For generic metrics
0059      * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL).
0060      */
0061     if (a->type != b->type)
0062         return a->type - b->type;
0063 
0064     if (a->ctx != b->ctx)
0065         return a->ctx - b->ctx;
0066 
0067     if (a->cgrp != b->cgrp)
0068         return (char *)a->cgrp < (char *)b->cgrp ? -1 : +1;
0069 
0070     if (a->evsel == NULL && b->evsel == NULL) {
0071         if (a->stat == b->stat)
0072             return 0;
0073 
0074         if ((char *)a->stat < (char *)b->stat)
0075             return -1;
0076 
0077         return 1;
0078     }
0079 
0080     if (a->evsel == b->evsel)
0081         return 0;
0082     if ((char *)a->evsel < (char *)b->evsel)
0083         return -1;
0084     return +1;
0085 }
0086 
0087 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
0088                      const void *entry)
0089 {
0090     struct saved_value *nd = malloc(sizeof(struct saved_value));
0091 
0092     if (!nd)
0093         return NULL;
0094     memcpy(nd, entry, sizeof(struct saved_value));
0095     return &nd->rb_node;
0096 }
0097 
0098 static void saved_value_delete(struct rblist *rblist __maybe_unused,
0099                    struct rb_node *rb_node)
0100 {
0101     struct saved_value *v;
0102 
0103     BUG_ON(!rb_node);
0104     v = container_of(rb_node, struct saved_value, rb_node);
0105     free(v);
0106 }
0107 
0108 static struct saved_value *saved_value_lookup(struct evsel *evsel,
0109                           int cpu_map_idx,
0110                           bool create,
0111                           enum stat_type type,
0112                           int ctx,
0113                           struct runtime_stat *st,
0114                           struct cgroup *cgrp)
0115 {
0116     struct rblist *rblist;
0117     struct rb_node *nd;
0118     struct saved_value dm = {
0119         .cpu_map_idx = cpu_map_idx,
0120         .evsel = evsel,
0121         .type = type,
0122         .ctx = ctx,
0123         .stat = st,
0124         .cgrp = cgrp,
0125     };
0126 
0127     rblist = &st->value_list;
0128 
0129     /* don't use context info for clock events */
0130     if (type == STAT_NSECS)
0131         dm.ctx = 0;
0132 
0133     nd = rblist__find(rblist, &dm);
0134     if (nd)
0135         return container_of(nd, struct saved_value, rb_node);
0136     if (create) {
0137         rblist__add_node(rblist, &dm);
0138         nd = rblist__find(rblist, &dm);
0139         if (nd)
0140             return container_of(nd, struct saved_value, rb_node);
0141     }
0142     return NULL;
0143 }
0144 
0145 void runtime_stat__init(struct runtime_stat *st)
0146 {
0147     struct rblist *rblist = &st->value_list;
0148 
0149     rblist__init(rblist);
0150     rblist->node_cmp = saved_value_cmp;
0151     rblist->node_new = saved_value_new;
0152     rblist->node_delete = saved_value_delete;
0153 }
0154 
0155 void runtime_stat__exit(struct runtime_stat *st)
0156 {
0157     rblist__exit(&st->value_list);
0158 }
0159 
0160 void perf_stat__init_shadow_stats(void)
0161 {
0162     runtime_stat__init(&rt_stat);
0163 }
0164 
0165 static int evsel_context(struct evsel *evsel)
0166 {
0167     int ctx = 0;
0168 
0169     if (evsel->core.attr.exclude_kernel)
0170         ctx |= CTX_BIT_KERNEL;
0171     if (evsel->core.attr.exclude_user)
0172         ctx |= CTX_BIT_USER;
0173     if (evsel->core.attr.exclude_hv)
0174         ctx |= CTX_BIT_HV;
0175     if (evsel->core.attr.exclude_host)
0176         ctx |= CTX_BIT_HOST;
0177     if (evsel->core.attr.exclude_idle)
0178         ctx |= CTX_BIT_IDLE;
0179 
0180     return ctx;
0181 }
0182 
0183 static void reset_stat(struct runtime_stat *st)
0184 {
0185     struct rblist *rblist;
0186     struct rb_node *pos, *next;
0187 
0188     rblist = &st->value_list;
0189     next = rb_first_cached(&rblist->entries);
0190     while (next) {
0191         pos = next;
0192         next = rb_next(pos);
0193         memset(&container_of(pos, struct saved_value, rb_node)->stats,
0194                0,
0195                sizeof(struct stats));
0196     }
0197 }
0198 
0199 void perf_stat__reset_shadow_stats(void)
0200 {
0201     reset_stat(&rt_stat);
0202     memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
0203     memset(&ru_stats, 0, sizeof(ru_stats));
0204 }
0205 
0206 void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)
0207 {
0208     reset_stat(st);
0209 }
0210 
0211 struct runtime_stat_data {
0212     int ctx;
0213     struct cgroup *cgrp;
0214 };
0215 
0216 static void update_runtime_stat(struct runtime_stat *st,
0217                 enum stat_type type,
0218                 int cpu_map_idx, u64 count,
0219                 struct runtime_stat_data *rsd)
0220 {
0221     struct saved_value *v = saved_value_lookup(NULL, cpu_map_idx, true, type,
0222                            rsd->ctx, st, rsd->cgrp);
0223 
0224     if (v)
0225         update_stats(&v->stats, count);
0226 }
0227 
0228 /*
0229  * Update various tracking values we maintain to print
0230  * more semantic information such as miss/hit ratios,
0231  * instruction rates, etc:
0232  */
0233 void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
0234                     int cpu_map_idx, struct runtime_stat *st)
0235 {
0236     u64 count_ns = count;
0237     struct saved_value *v;
0238     struct runtime_stat_data rsd = {
0239         .ctx = evsel_context(counter),
0240         .cgrp = counter->cgrp,
0241     };
0242 
0243     count *= counter->scale;
0244 
0245     if (evsel__is_clock(counter))
0246         update_runtime_stat(st, STAT_NSECS, cpu_map_idx, count_ns, &rsd);
0247     else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
0248         update_runtime_stat(st, STAT_CYCLES, cpu_map_idx, count, &rsd);
0249     else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
0250         update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu_map_idx, count, &rsd);
0251     else if (perf_stat_evsel__is(counter, TRANSACTION_START))
0252         update_runtime_stat(st, STAT_TRANSACTION, cpu_map_idx, count, &rsd);
0253     else if (perf_stat_evsel__is(counter, ELISION_START))
0254         update_runtime_stat(st, STAT_ELISION, cpu_map_idx, count, &rsd);
0255     else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
0256         update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
0257                     cpu_map_idx, count, &rsd);
0258     else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
0259         update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
0260                     cpu_map_idx, count, &rsd);
0261     else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
0262         update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
0263                     cpu_map_idx, count, &rsd);
0264     else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
0265         update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
0266                     cpu_map_idx, count, &rsd);
0267     else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
0268         update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
0269                     cpu_map_idx, count, &rsd);
0270     else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING))
0271         update_runtime_stat(st, STAT_TOPDOWN_RETIRING,
0272                     cpu_map_idx, count, &rsd);
0273     else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC))
0274         update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC,
0275                     cpu_map_idx, count, &rsd);
0276     else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND))
0277         update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND,
0278                     cpu_map_idx, count, &rsd);
0279     else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
0280         update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
0281                     cpu_map_idx, count, &rsd);
0282     else if (perf_stat_evsel__is(counter, TOPDOWN_HEAVY_OPS))
0283         update_runtime_stat(st, STAT_TOPDOWN_HEAVY_OPS,
0284                     cpu_map_idx, count, &rsd);
0285     else if (perf_stat_evsel__is(counter, TOPDOWN_BR_MISPREDICT))
0286         update_runtime_stat(st, STAT_TOPDOWN_BR_MISPREDICT,
0287                     cpu_map_idx, count, &rsd);
0288     else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_LAT))
0289         update_runtime_stat(st, STAT_TOPDOWN_FETCH_LAT,
0290                     cpu_map_idx, count, &rsd);
0291     else if (perf_stat_evsel__is(counter, TOPDOWN_MEM_BOUND))
0292         update_runtime_stat(st, STAT_TOPDOWN_MEM_BOUND,
0293                     cpu_map_idx, count, &rsd);
0294     else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
0295         update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
0296                     cpu_map_idx, count, &rsd);
0297     else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
0298         update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
0299                     cpu_map_idx, count, &rsd);
0300     else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
0301         update_runtime_stat(st, STAT_BRANCHES, cpu_map_idx, count, &rsd);
0302     else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
0303         update_runtime_stat(st, STAT_CACHEREFS, cpu_map_idx, count, &rsd);
0304     else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
0305         update_runtime_stat(st, STAT_L1_DCACHE, cpu_map_idx, count, &rsd);
0306     else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
0307         update_runtime_stat(st, STAT_L1_ICACHE, cpu_map_idx, count, &rsd);
0308     else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL))
0309         update_runtime_stat(st, STAT_LL_CACHE, cpu_map_idx, count, &rsd);
0310     else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
0311         update_runtime_stat(st, STAT_DTLB_CACHE, cpu_map_idx, count, &rsd);
0312     else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
0313         update_runtime_stat(st, STAT_ITLB_CACHE, cpu_map_idx, count, &rsd);
0314     else if (perf_stat_evsel__is(counter, SMI_NUM))
0315         update_runtime_stat(st, STAT_SMI_NUM, cpu_map_idx, count, &rsd);
0316     else if (perf_stat_evsel__is(counter, APERF))
0317         update_runtime_stat(st, STAT_APERF, cpu_map_idx, count, &rsd);
0318 
0319     if (counter->collect_stat) {
0320         v = saved_value_lookup(counter, cpu_map_idx, true, STAT_NONE, 0, st,
0321                        rsd.cgrp);
0322         update_stats(&v->stats, count);
0323         if (counter->metric_leader)
0324             v->metric_total += count;
0325     } else if (counter->metric_leader) {
0326         v = saved_value_lookup(counter->metric_leader,
0327                        cpu_map_idx, true, STAT_NONE, 0, st, rsd.cgrp);
0328         v->metric_total += count;
0329         v->metric_other++;
0330     }
0331 }
0332 
0333 /* used for get_ratio_color() */
0334 enum grc_type {
0335     GRC_STALLED_CYCLES_FE,
0336     GRC_STALLED_CYCLES_BE,
0337     GRC_CACHE_MISSES,
0338     GRC_MAX_NR
0339 };
0340 
0341 static const char *get_ratio_color(enum grc_type type, double ratio)
0342 {
0343     static const double grc_table[GRC_MAX_NR][3] = {
0344         [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
0345         [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
0346         [GRC_CACHE_MISSES]  = { 20.0, 10.0, 5.0 },
0347     };
0348     const char *color = PERF_COLOR_NORMAL;
0349 
0350     if (ratio > grc_table[type][0])
0351         color = PERF_COLOR_RED;
0352     else if (ratio > grc_table[type][1])
0353         color = PERF_COLOR_MAGENTA;
0354     else if (ratio > grc_table[type][2])
0355         color = PERF_COLOR_YELLOW;
0356 
0357     return color;
0358 }
0359 
0360 static struct evsel *perf_stat__find_event(struct evlist *evsel_list,
0361                         const char *name)
0362 {
0363     struct evsel *c2;
0364 
0365     evlist__for_each_entry (evsel_list, c2) {
0366         if (!strcasecmp(c2->name, name) && !c2->collect_stat)
0367             return c2;
0368     }
0369     return NULL;
0370 }
0371 
0372 /* Mark MetricExpr target events and link events using them to them. */
0373 void perf_stat__collect_metric_expr(struct evlist *evsel_list)
0374 {
0375     struct evsel *counter, *leader, **metric_events, *oc;
0376     bool found;
0377     struct expr_parse_ctx *ctx;
0378     struct hashmap_entry *cur;
0379     size_t bkt;
0380     int i;
0381 
0382     ctx = expr__ctx_new();
0383     if (!ctx) {
0384         pr_debug("expr__ctx_new failed");
0385         return;
0386     }
0387     evlist__for_each_entry(evsel_list, counter) {
0388         bool invalid = false;
0389 
0390         leader = evsel__leader(counter);
0391         if (!counter->metric_expr)
0392             continue;
0393 
0394         expr__ctx_clear(ctx);
0395         metric_events = counter->metric_events;
0396         if (!metric_events) {
0397             if (expr__find_ids(counter->metric_expr,
0398                        counter->name,
0399                        ctx) < 0)
0400                 continue;
0401 
0402             metric_events = calloc(sizeof(struct evsel *),
0403                            hashmap__size(ctx->ids) + 1);
0404             if (!metric_events) {
0405                 expr__ctx_free(ctx);
0406                 return;
0407             }
0408             counter->metric_events = metric_events;
0409         }
0410 
0411         i = 0;
0412         hashmap__for_each_entry(ctx->ids, cur, bkt) {
0413             const char *metric_name = (const char *)cur->key;
0414 
0415             found = false;
0416             if (leader) {
0417                 /* Search in group */
0418                 for_each_group_member (oc, leader) {
0419                     if (!strcasecmp(oc->name,
0420                             metric_name) &&
0421                         !oc->collect_stat) {
0422                         found = true;
0423                         break;
0424                     }
0425                 }
0426             }
0427             if (!found) {
0428                 /* Search ignoring groups */
0429                 oc = perf_stat__find_event(evsel_list,
0430                                metric_name);
0431             }
0432             if (!oc) {
0433                 /* Deduping one is good enough to handle duplicated PMUs. */
0434                 static char *printed;
0435 
0436                 /*
0437                  * Adding events automatically would be difficult, because
0438                  * it would risk creating groups that are not schedulable.
0439                  * perf stat doesn't understand all the scheduling constraints
0440                  * of events. So we ask the user instead to add the missing
0441                  * events.
0442                  */
0443                 if (!printed ||
0444                     strcasecmp(printed, metric_name)) {
0445                     fprintf(stderr,
0446                         "Add %s event to groups to get metric expression for %s\n",
0447                         metric_name,
0448                         counter->name);
0449                     free(printed);
0450                     printed = strdup(metric_name);
0451                 }
0452                 invalid = true;
0453                 continue;
0454             }
0455             metric_events[i++] = oc;
0456             oc->collect_stat = true;
0457         }
0458         metric_events[i] = NULL;
0459         if (invalid) {
0460             free(metric_events);
0461             counter->metric_events = NULL;
0462             counter->metric_expr = NULL;
0463         }
0464     }
0465     expr__ctx_free(ctx);
0466 }
0467 
0468 static double runtime_stat_avg(struct runtime_stat *st,
0469                    enum stat_type type, int cpu_map_idx,
0470                    struct runtime_stat_data *rsd)
0471 {
0472     struct saved_value *v;
0473 
0474     v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp);
0475     if (!v)
0476         return 0.0;
0477 
0478     return avg_stats(&v->stats);
0479 }
0480 
0481 static double runtime_stat_n(struct runtime_stat *st,
0482                  enum stat_type type, int cpu_map_idx,
0483                  struct runtime_stat_data *rsd)
0484 {
0485     struct saved_value *v;
0486 
0487     v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp);
0488     if (!v)
0489         return 0.0;
0490 
0491     return v->stats.n;
0492 }
0493 
0494 static void print_stalled_cycles_frontend(struct perf_stat_config *config,
0495                       int cpu_map_idx, double avg,
0496                       struct perf_stat_output_ctx *out,
0497                       struct runtime_stat *st,
0498                       struct runtime_stat_data *rsd)
0499 {
0500     double total, ratio = 0.0;
0501     const char *color;
0502 
0503     total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
0504 
0505     if (total)
0506         ratio = avg / total * 100.0;
0507 
0508     color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
0509 
0510     if (ratio)
0511         out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle",
0512                   ratio);
0513     else
0514         out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0);
0515 }
0516 
0517 static void print_stalled_cycles_backend(struct perf_stat_config *config,
0518                      int cpu_map_idx, double avg,
0519                      struct perf_stat_output_ctx *out,
0520                      struct runtime_stat *st,
0521                      struct runtime_stat_data *rsd)
0522 {
0523     double total, ratio = 0.0;
0524     const char *color;
0525 
0526     total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
0527 
0528     if (total)
0529         ratio = avg / total * 100.0;
0530 
0531     color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
0532 
0533     out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
0534 }
0535 
0536 static void print_branch_misses(struct perf_stat_config *config,
0537                 int cpu_map_idx, double avg,
0538                 struct perf_stat_output_ctx *out,
0539                 struct runtime_stat *st,
0540                 struct runtime_stat_data *rsd)
0541 {
0542     double total, ratio = 0.0;
0543     const char *color;
0544 
0545     total = runtime_stat_avg(st, STAT_BRANCHES, cpu_map_idx, rsd);
0546 
0547     if (total)
0548         ratio = avg / total * 100.0;
0549 
0550     color = get_ratio_color(GRC_CACHE_MISSES, ratio);
0551 
0552     out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio);
0553 }
0554 
0555 static void print_l1_dcache_misses(struct perf_stat_config *config,
0556                    int cpu_map_idx, double avg,
0557                    struct perf_stat_output_ctx *out,
0558                    struct runtime_stat *st,
0559                    struct runtime_stat_data *rsd)
0560 {
0561     double total, ratio = 0.0;
0562     const char *color;
0563 
0564     total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu_map_idx, rsd);
0565 
0566     if (total)
0567         ratio = avg / total * 100.0;
0568 
0569     color = get_ratio_color(GRC_CACHE_MISSES, ratio);
0570 
0571     out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache accesses", ratio);
0572 }
0573 
0574 static void print_l1_icache_misses(struct perf_stat_config *config,
0575                    int cpu_map_idx, double avg,
0576                    struct perf_stat_output_ctx *out,
0577                    struct runtime_stat *st,
0578                    struct runtime_stat_data *rsd)
0579 {
0580     double total, ratio = 0.0;
0581     const char *color;
0582 
0583     total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu_map_idx, rsd);
0584 
0585     if (total)
0586         ratio = avg / total * 100.0;
0587 
0588     color = get_ratio_color(GRC_CACHE_MISSES, ratio);
0589     out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache accesses", ratio);
0590 }
0591 
0592 static void print_dtlb_cache_misses(struct perf_stat_config *config,
0593                     int cpu_map_idx, double avg,
0594                     struct perf_stat_output_ctx *out,
0595                     struct runtime_stat *st,
0596                     struct runtime_stat_data *rsd)
0597 {
0598     double total, ratio = 0.0;
0599     const char *color;
0600 
0601     total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu_map_idx, rsd);
0602 
0603     if (total)
0604         ratio = avg / total * 100.0;
0605 
0606     color = get_ratio_color(GRC_CACHE_MISSES, ratio);
0607     out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache accesses", ratio);
0608 }
0609 
0610 static void print_itlb_cache_misses(struct perf_stat_config *config,
0611                     int cpu_map_idx, double avg,
0612                     struct perf_stat_output_ctx *out,
0613                     struct runtime_stat *st,
0614                     struct runtime_stat_data *rsd)
0615 {
0616     double total, ratio = 0.0;
0617     const char *color;
0618 
0619     total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu_map_idx, rsd);
0620 
0621     if (total)
0622         ratio = avg / total * 100.0;
0623 
0624     color = get_ratio_color(GRC_CACHE_MISSES, ratio);
0625     out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache accesses", ratio);
0626 }
0627 
0628 static void print_ll_cache_misses(struct perf_stat_config *config,
0629                   int cpu_map_idx, double avg,
0630                   struct perf_stat_output_ctx *out,
0631                   struct runtime_stat *st,
0632                   struct runtime_stat_data *rsd)
0633 {
0634     double total, ratio = 0.0;
0635     const char *color;
0636 
0637     total = runtime_stat_avg(st, STAT_LL_CACHE, cpu_map_idx, rsd);
0638 
0639     if (total)
0640         ratio = avg / total * 100.0;
0641 
0642     color = get_ratio_color(GRC_CACHE_MISSES, ratio);
0643     out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache accesses", ratio);
0644 }
0645 
0646 /*
0647  * High level "TopDown" CPU core pipe line bottleneck break down.
0648  *
0649  * Basic concept following
0650  * Yasin, A Top Down Method for Performance analysis and Counter architecture
0651  * ISPASS14
0652  *
0653  * The CPU pipeline is divided into 4 areas that can be bottlenecks:
0654  *
0655  * Frontend -> Backend -> Retiring
0656  * BadSpeculation in addition means out of order execution that is thrown away
0657  * (for example branch mispredictions)
0658  * Frontend is instruction decoding.
0659  * Backend is execution, like computation and accessing data in memory
0660  * Retiring is good execution that is not directly bottlenecked
0661  *
0662  * The formulas are computed in slots.
0663  * A slot is an entry in the pipeline each for the pipeline width
0664  * (for example a 4-wide pipeline has 4 slots for each cycle)
0665  *
0666  * Formulas:
0667  * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
0668  *          TotalSlots
0669  * Retiring = SlotsRetired / TotalSlots
0670  * FrontendBound = FetchBubbles / TotalSlots
0671  * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
0672  *
0673  * The kernel provides the mapping to the low level CPU events and any scaling
0674  * needed for the CPU pipeline width, for example:
0675  *
0676  * TotalSlots = Cycles * 4
0677  *
0678  * The scaling factor is communicated in the sysfs unit.
0679  *
0680  * In some cases the CPU may not be able to measure all the formulas due to
0681  * missing events. In this case multiple formulas are combined, as possible.
0682  *
0683  * Full TopDown supports more levels to sub-divide each area: for example
0684  * BackendBound into computing bound and memory bound. For now we only
0685  * support Level 1 TopDown.
0686  */
0687 
0688 static double sanitize_val(double x)
0689 {
0690     if (x < 0 && x >= -0.02)
0691         return 0.0;
0692     return x;
0693 }
0694 
0695 static double td_total_slots(int cpu_map_idx, struct runtime_stat *st,
0696                  struct runtime_stat_data *rsd)
0697 {
0698     return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu_map_idx, rsd);
0699 }
0700 
0701 static double td_bad_spec(int cpu_map_idx, struct runtime_stat *st,
0702               struct runtime_stat_data *rsd)
0703 {
0704     double bad_spec = 0;
0705     double total_slots;
0706     double total;
0707 
0708     total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu_map_idx, rsd) -
0709         runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu_map_idx, rsd) +
0710         runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu_map_idx, rsd);
0711 
0712     total_slots = td_total_slots(cpu_map_idx, st, rsd);
0713     if (total_slots)
0714         bad_spec = total / total_slots;
0715     return sanitize_val(bad_spec);
0716 }
0717 
0718 static double td_retiring(int cpu_map_idx, struct runtime_stat *st,
0719               struct runtime_stat_data *rsd)
0720 {
0721     double retiring = 0;
0722     double total_slots = td_total_slots(cpu_map_idx, st, rsd);
0723     double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
0724                         cpu_map_idx, rsd);
0725 
0726     if (total_slots)
0727         retiring = ret_slots / total_slots;
0728     return retiring;
0729 }
0730 
0731 static double td_fe_bound(int cpu_map_idx, struct runtime_stat *st,
0732               struct runtime_stat_data *rsd)
0733 {
0734     double fe_bound = 0;
0735     double total_slots = td_total_slots(cpu_map_idx, st, rsd);
0736     double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
0737                         cpu_map_idx, rsd);
0738 
0739     if (total_slots)
0740         fe_bound = fetch_bub / total_slots;
0741     return fe_bound;
0742 }
0743 
0744 static double td_be_bound(int cpu_map_idx, struct runtime_stat *st,
0745               struct runtime_stat_data *rsd)
0746 {
0747     double sum = (td_fe_bound(cpu_map_idx, st, rsd) +
0748               td_bad_spec(cpu_map_idx, st, rsd) +
0749               td_retiring(cpu_map_idx, st, rsd));
0750     if (sum == 0)
0751         return 0;
0752     return sanitize_val(1.0 - sum);
0753 }
0754 
0755 /*
0756  * Kernel reports metrics multiplied with slots. To get back
0757  * the ratios we need to recreate the sum.
0758  */
0759 
0760 static double td_metric_ratio(int cpu_map_idx, enum stat_type type,
0761                   struct runtime_stat *stat,
0762                   struct runtime_stat_data *rsd)
0763 {
0764     double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) +
0765         runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) +
0766         runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) +
0767         runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd);
0768     double d = runtime_stat_avg(stat, type, cpu_map_idx, rsd);
0769 
0770     if (sum)
0771         return d / sum;
0772     return 0;
0773 }
0774 
0775 /*
0776  * ... but only if most of the values are actually available.
0777  * We allow two missing.
0778  */
0779 
0780 static bool full_td(int cpu_map_idx, struct runtime_stat *stat,
0781             struct runtime_stat_data *rsd)
0782 {
0783     int c = 0;
0784 
0785     if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) > 0)
0786         c++;
0787     if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) > 0)
0788         c++;
0789     if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) > 0)
0790         c++;
0791     if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd) > 0)
0792         c++;
0793     return c >= 2;
0794 }
0795 
0796 static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx,
0797                struct perf_stat_output_ctx *out,
0798                struct runtime_stat *st,
0799                struct runtime_stat_data *rsd)
0800 {
0801     double smi_num, aperf, cycles, cost = 0.0;
0802     const char *color = NULL;
0803 
0804     smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu_map_idx, rsd);
0805     aperf = runtime_stat_avg(st, STAT_APERF, cpu_map_idx, rsd);
0806     cycles = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd);
0807 
0808     if ((cycles == 0) || (aperf == 0))
0809         return;
0810 
0811     if (smi_num)
0812         cost = (aperf - cycles) / aperf * 100.00;
0813 
0814     if (cost > 10)
0815         color = PERF_COLOR_RED;
0816     out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
0817     out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num);
0818 }
0819 
0820 static int prepare_metric(struct evsel **metric_events,
0821               struct metric_ref *metric_refs,
0822               struct expr_parse_ctx *pctx,
0823               int cpu_map_idx,
0824               struct runtime_stat *st)
0825 {
0826     double scale;
0827     char *n;
0828     int i, j, ret;
0829 
0830     for (i = 0; metric_events[i]; i++) {
0831         struct saved_value *v;
0832         struct stats *stats;
0833         u64 metric_total = 0;
0834         int source_count;
0835 
0836         if (evsel__is_tool(metric_events[i])) {
0837             source_count = 1;
0838             switch (metric_events[i]->tool_event) {
0839             case PERF_TOOL_DURATION_TIME:
0840                 stats = &walltime_nsecs_stats;
0841                 scale = 1e-9;
0842                 break;
0843             case PERF_TOOL_USER_TIME:
0844                 stats = &ru_stats.ru_utime_usec_stat;
0845                 scale = 1e-6;
0846                 break;
0847             case PERF_TOOL_SYSTEM_TIME:
0848                 stats = &ru_stats.ru_stime_usec_stat;
0849                 scale = 1e-6;
0850                 break;
0851             case PERF_TOOL_NONE:
0852                 pr_err("Invalid tool event 'none'");
0853                 abort();
0854             case PERF_TOOL_MAX:
0855                 pr_err("Invalid tool event 'max'");
0856                 abort();
0857             default:
0858                 pr_err("Unknown tool event '%s'", evsel__name(metric_events[i]));
0859                 abort();
0860             }
0861         } else {
0862             v = saved_value_lookup(metric_events[i], cpu_map_idx, false,
0863                            STAT_NONE, 0, st,
0864                            metric_events[i]->cgrp);
0865             if (!v)
0866                 break;
0867             stats = &v->stats;
0868             scale = 1.0;
0869             source_count = evsel__source_count(metric_events[i]);
0870 
0871             if (v->metric_other)
0872                 metric_total = v->metric_total;
0873         }
0874         n = strdup(evsel__metric_id(metric_events[i]));
0875         if (!n)
0876             return -ENOMEM;
0877 
0878         expr__add_id_val_source_count(pctx, n,
0879                     metric_total ? : avg_stats(stats) * scale,
0880                     source_count);
0881     }
0882 
0883     for (j = 0; metric_refs && metric_refs[j].metric_name; j++) {
0884         ret = expr__add_ref(pctx, &metric_refs[j]);
0885         if (ret)
0886             return ret;
0887     }
0888 
0889     return i;
0890 }
0891 
0892 static void generic_metric(struct perf_stat_config *config,
0893                const char *metric_expr,
0894                struct evsel **metric_events,
0895                struct metric_ref *metric_refs,
0896                char *name,
0897                const char *metric_name,
0898                const char *metric_unit,
0899                int runtime,
0900                int cpu_map_idx,
0901                struct perf_stat_output_ctx *out,
0902                struct runtime_stat *st)
0903 {
0904     print_metric_t print_metric = out->print_metric;
0905     struct expr_parse_ctx *pctx;
0906     double ratio, scale;
0907     int i;
0908     void *ctxp = out->ctx;
0909 
0910     pctx = expr__ctx_new();
0911     if (!pctx)
0912         return;
0913 
0914     pctx->runtime = runtime;
0915     i = prepare_metric(metric_events, metric_refs, pctx, cpu_map_idx, st);
0916     if (i < 0) {
0917         expr__ctx_free(pctx);
0918         return;
0919     }
0920     if (!metric_events[i]) {
0921         if (expr__parse(&ratio, pctx, metric_expr) == 0) {
0922             char *unit;
0923             char metric_bf[64];
0924 
0925             if (metric_unit && metric_name) {
0926                 if (perf_pmu__convert_scale(metric_unit,
0927                     &unit, &scale) >= 0) {
0928                     ratio *= scale;
0929                 }
0930                 if (strstr(metric_expr, "?"))
0931                     scnprintf(metric_bf, sizeof(metric_bf),
0932                       "%s  %s_%d", unit, metric_name, runtime);
0933                 else
0934                     scnprintf(metric_bf, sizeof(metric_bf),
0935                       "%s  %s", unit, metric_name);
0936 
0937                 print_metric(config, ctxp, NULL, "%8.1f",
0938                          metric_bf, ratio);
0939             } else {
0940                 print_metric(config, ctxp, NULL, "%8.2f",
0941                     metric_name ?
0942                     metric_name :
0943                     out->force_header ?  name : "",
0944                     ratio);
0945             }
0946         } else {
0947             print_metric(config, ctxp, NULL, NULL,
0948                      out->force_header ?
0949                      (metric_name ? metric_name : name) : "", 0);
0950         }
0951     } else {
0952         print_metric(config, ctxp, NULL, NULL,
0953                  out->force_header ?
0954                  (metric_name ? metric_name : name) : "", 0);
0955     }
0956 
0957     expr__ctx_free(pctx);
0958 }
0959 
0960 double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st)
0961 {
0962     struct expr_parse_ctx *pctx;
0963     double ratio = 0.0;
0964 
0965     pctx = expr__ctx_new();
0966     if (!pctx)
0967         return NAN;
0968 
0969     if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu_map_idx, st) < 0)
0970         goto out;
0971 
0972     if (expr__parse(&ratio, pctx, mexp->metric_expr))
0973         ratio = 0.0;
0974 
0975 out:
0976     expr__ctx_free(pctx);
0977     return ratio;
0978 }
0979 
0980 void perf_stat__print_shadow_stats(struct perf_stat_config *config,
0981                    struct evsel *evsel,
0982                    double avg, int cpu_map_idx,
0983                    struct perf_stat_output_ctx *out,
0984                    struct rblist *metric_events,
0985                    struct runtime_stat *st)
0986 {
0987     void *ctxp = out->ctx;
0988     print_metric_t print_metric = out->print_metric;
0989     double total, ratio = 0.0, total2;
0990     const char *color = NULL;
0991     struct runtime_stat_data rsd = {
0992         .ctx = evsel_context(evsel),
0993         .cgrp = evsel->cgrp,
0994     };
0995     struct metric_event *me;
0996     int num = 1;
0997 
0998     if (config->iostat_run) {
0999         iostat_print_metric(config, evsel, out);
1000     } else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
1001         total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
1002 
1003         if (total) {
1004             ratio = avg / total;
1005             print_metric(config, ctxp, NULL, "%7.2f ",
1006                     "insn per cycle", ratio);
1007         } else {
1008             print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
1009         }
1010 
1011         total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu_map_idx, &rsd);
1012 
1013         total = max(total, runtime_stat_avg(st,
1014                             STAT_STALLED_CYCLES_BACK,
1015                             cpu_map_idx, &rsd));
1016 
1017         if (total && avg) {
1018             out->new_line(config, ctxp);
1019             ratio = total / avg;
1020             print_metric(config, ctxp, NULL, "%7.2f ",
1021                     "stalled cycles per insn",
1022                     ratio);
1023         }
1024     } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
1025         if (runtime_stat_n(st, STAT_BRANCHES, cpu_map_idx, &rsd) != 0)
1026             print_branch_misses(config, cpu_map_idx, avg, out, st, &rsd);
1027         else
1028             print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
1029     } else if (
1030         evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
1031         evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
1032                     ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1033                      ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
1034 
1035         if (runtime_stat_n(st, STAT_L1_DCACHE, cpu_map_idx, &rsd) != 0)
1036             print_l1_dcache_misses(config, cpu_map_idx, avg, out, st, &rsd);
1037         else
1038             print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0);
1039     } else if (
1040         evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
1041         evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
1042                     ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1043                      ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
1044 
1045         if (runtime_stat_n(st, STAT_L1_ICACHE, cpu_map_idx, &rsd) != 0)
1046             print_l1_icache_misses(config, cpu_map_idx, avg, out, st, &rsd);
1047         else
1048             print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0);
1049     } else if (
1050         evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
1051         evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
1052                     ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1053                      ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
1054 
1055         if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu_map_idx, &rsd) != 0)
1056             print_dtlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
1057         else
1058             print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0);
1059     } else if (
1060         evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
1061         evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
1062                     ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1063                      ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
1064 
1065         if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu_map_idx, &rsd) != 0)
1066             print_itlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
1067         else
1068             print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0);
1069     } else if (
1070         evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
1071         evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
1072                     ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1073                      ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
1074 
1075         if (runtime_stat_n(st, STAT_LL_CACHE, cpu_map_idx, &rsd) != 0)
1076             print_ll_cache_misses(config, cpu_map_idx, avg, out, st, &rsd);
1077         else
1078             print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0);
1079     } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
1080         total = runtime_stat_avg(st, STAT_CACHEREFS, cpu_map_idx, &rsd);
1081 
1082         if (total)
1083             ratio = avg * 100 / total;
1084 
1085         if (runtime_stat_n(st, STAT_CACHEREFS, cpu_map_idx, &rsd) != 0)
1086             print_metric(config, ctxp, NULL, "%8.3f %%",
1087                      "of all cache refs", ratio);
1088         else
1089             print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
1090     } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
1091         print_stalled_cycles_frontend(config, cpu_map_idx, avg, out, st, &rsd);
1092     } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
1093         print_stalled_cycles_backend(config, cpu_map_idx, avg, out, st, &rsd);
1094     } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
1095         total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd);
1096 
1097         if (total) {
1098             ratio = avg / total;
1099             print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio);
1100         } else {
1101             print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
1102         }
1103     } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
1104         total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
1105 
1106         if (total)
1107             print_metric(config, ctxp, NULL,
1108                     "%7.2f%%", "transactional cycles",
1109                     100.0 * (avg / total));
1110         else
1111             print_metric(config, ctxp, NULL, NULL, "transactional cycles",
1112                      0);
1113     } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
1114         total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd);
1115         total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
1116 
1117         if (total2 < avg)
1118             total2 = avg;
1119         if (total)
1120             print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles",
1121                 100.0 * ((total2-avg) / total));
1122         else
1123             print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
1124     } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
1125         total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
1126 
1127         if (avg)
1128             ratio = total / avg;
1129 
1130         if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd) != 0)
1131             print_metric(config, ctxp, NULL, "%8.0f",
1132                      "cycles / transaction", ratio);
1133         else
1134             print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
1135                       0);
1136     } else if (perf_stat_evsel__is(evsel, ELISION_START)) {
1137         total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd);
1138 
1139         if (avg)
1140             ratio = total / avg;
1141 
1142         print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio);
1143     } else if (evsel__is_clock(evsel)) {
1144         if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
1145             print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized",
1146                      avg / (ratio * evsel->scale));
1147         else
1148             print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
1149     } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
1150         double fe_bound = td_fe_bound(cpu_map_idx, st, &rsd);
1151 
1152         if (fe_bound > 0.2)
1153             color = PERF_COLOR_RED;
1154         print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
1155                 fe_bound * 100.);
1156     } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
1157         double retiring = td_retiring(cpu_map_idx, st, &rsd);
1158 
1159         if (retiring > 0.7)
1160             color = PERF_COLOR_GREEN;
1161         print_metric(config, ctxp, color, "%8.1f%%", "retiring",
1162                 retiring * 100.);
1163     } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
1164         double bad_spec = td_bad_spec(cpu_map_idx, st, &rsd);
1165 
1166         if (bad_spec > 0.1)
1167             color = PERF_COLOR_RED;
1168         print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
1169                 bad_spec * 100.);
1170     } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
1171         double be_bound = td_be_bound(cpu_map_idx, st, &rsd);
1172         const char *name = "backend bound";
1173         static int have_recovery_bubbles = -1;
1174 
1175         /* In case the CPU does not support topdown-recovery-bubbles */
1176         if (have_recovery_bubbles < 0)
1177             have_recovery_bubbles = pmu_have_event("cpu",
1178                     "topdown-recovery-bubbles");
1179         if (!have_recovery_bubbles)
1180             name = "backend bound/bad spec";
1181 
1182         if (be_bound > 0.2)
1183             color = PERF_COLOR_RED;
1184         if (td_total_slots(cpu_map_idx, st, &rsd) > 0)
1185             print_metric(config, ctxp, color, "%8.1f%%", name,
1186                     be_bound * 100.);
1187         else
1188             print_metric(config, ctxp, NULL, NULL, name, 0);
1189     } else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) &&
1190            full_td(cpu_map_idx, st, &rsd)) {
1191         double retiring = td_metric_ratio(cpu_map_idx,
1192                           STAT_TOPDOWN_RETIRING, st,
1193                           &rsd);
1194         if (retiring > 0.7)
1195             color = PERF_COLOR_GREEN;
1196         print_metric(config, ctxp, color, "%8.1f%%", "Retiring",
1197                 retiring * 100.);
1198     } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
1199            full_td(cpu_map_idx, st, &rsd)) {
1200         double fe_bound = td_metric_ratio(cpu_map_idx,
1201                           STAT_TOPDOWN_FE_BOUND, st,
1202                           &rsd);
1203         if (fe_bound > 0.2)
1204             color = PERF_COLOR_RED;
1205         print_metric(config, ctxp, color, "%8.1f%%", "Frontend Bound",
1206                 fe_bound * 100.);
1207     } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
1208            full_td(cpu_map_idx, st, &rsd)) {
1209         double be_bound = td_metric_ratio(cpu_map_idx,
1210                           STAT_TOPDOWN_BE_BOUND, st,
1211                           &rsd);
1212         if (be_bound > 0.2)
1213             color = PERF_COLOR_RED;
1214         print_metric(config, ctxp, color, "%8.1f%%", "Backend Bound",
1215                 be_bound * 100.);
1216     } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
1217            full_td(cpu_map_idx, st, &rsd)) {
1218         double bad_spec = td_metric_ratio(cpu_map_idx,
1219                           STAT_TOPDOWN_BAD_SPEC, st,
1220                           &rsd);
1221         if (bad_spec > 0.1)
1222             color = PERF_COLOR_RED;
1223         print_metric(config, ctxp, color, "%8.1f%%", "Bad Speculation",
1224                 bad_spec * 100.);
1225     } else if (perf_stat_evsel__is(evsel, TOPDOWN_HEAVY_OPS) &&
1226             full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
1227         double retiring = td_metric_ratio(cpu_map_idx,
1228                           STAT_TOPDOWN_RETIRING, st,
1229                           &rsd);
1230         double heavy_ops = td_metric_ratio(cpu_map_idx,
1231                            STAT_TOPDOWN_HEAVY_OPS, st,
1232                            &rsd);
1233         double light_ops = retiring - heavy_ops;
1234 
1235         if (retiring > 0.7 && heavy_ops > 0.1)
1236             color = PERF_COLOR_GREEN;
1237         print_metric(config, ctxp, color, "%8.1f%%", "Heavy Operations",
1238                 heavy_ops * 100.);
1239         if (retiring > 0.7 && light_ops > 0.6)
1240             color = PERF_COLOR_GREEN;
1241         else
1242             color = NULL;
1243         print_metric(config, ctxp, color, "%8.1f%%", "Light Operations",
1244                 light_ops * 100.);
1245     } else if (perf_stat_evsel__is(evsel, TOPDOWN_BR_MISPREDICT) &&
1246             full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
1247         double bad_spec = td_metric_ratio(cpu_map_idx,
1248                           STAT_TOPDOWN_BAD_SPEC, st,
1249                           &rsd);
1250         double br_mis = td_metric_ratio(cpu_map_idx,
1251                         STAT_TOPDOWN_BR_MISPREDICT, st,
1252                         &rsd);
1253         double m_clears = bad_spec - br_mis;
1254 
1255         if (bad_spec > 0.1 && br_mis > 0.05)
1256             color = PERF_COLOR_RED;
1257         print_metric(config, ctxp, color, "%8.1f%%", "Branch Mispredict",
1258                 br_mis * 100.);
1259         if (bad_spec > 0.1 && m_clears > 0.05)
1260             color = PERF_COLOR_RED;
1261         else
1262             color = NULL;
1263         print_metric(config, ctxp, color, "%8.1f%%", "Machine Clears",
1264                 m_clears * 100.);
1265     } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_LAT) &&
1266             full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
1267         double fe_bound = td_metric_ratio(cpu_map_idx,
1268                           STAT_TOPDOWN_FE_BOUND, st,
1269                           &rsd);
1270         double fetch_lat = td_metric_ratio(cpu_map_idx,
1271                            STAT_TOPDOWN_FETCH_LAT, st,
1272                            &rsd);
1273         double fetch_bw = fe_bound - fetch_lat;
1274 
1275         if (fe_bound > 0.2 && fetch_lat > 0.15)
1276             color = PERF_COLOR_RED;
1277         print_metric(config, ctxp, color, "%8.1f%%", "Fetch Latency",
1278                 fetch_lat * 100.);
1279         if (fe_bound > 0.2 && fetch_bw > 0.1)
1280             color = PERF_COLOR_RED;
1281         else
1282             color = NULL;
1283         print_metric(config, ctxp, color, "%8.1f%%", "Fetch Bandwidth",
1284                 fetch_bw * 100.);
1285     } else if (perf_stat_evsel__is(evsel, TOPDOWN_MEM_BOUND) &&
1286             full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
1287         double be_bound = td_metric_ratio(cpu_map_idx,
1288                           STAT_TOPDOWN_BE_BOUND, st,
1289                           &rsd);
1290         double mem_bound = td_metric_ratio(cpu_map_idx,
1291                            STAT_TOPDOWN_MEM_BOUND, st,
1292                            &rsd);
1293         double core_bound = be_bound - mem_bound;
1294 
1295         if (be_bound > 0.2 && mem_bound > 0.2)
1296             color = PERF_COLOR_RED;
1297         print_metric(config, ctxp, color, "%8.1f%%", "Memory Bound",
1298                 mem_bound * 100.);
1299         if (be_bound > 0.2 && core_bound > 0.1)
1300             color = PERF_COLOR_RED;
1301         else
1302             color = NULL;
1303         print_metric(config, ctxp, color, "%8.1f%%", "Core Bound",
1304                 core_bound * 100.);
1305     } else if (evsel->metric_expr) {
1306         generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
1307                 evsel->name, evsel->metric_name, NULL, 1, cpu_map_idx, out, st);
1308     } else if (runtime_stat_n(st, STAT_NSECS, cpu_map_idx, &rsd) != 0) {
1309         char unit = ' ';
1310         char unit_buf[10] = "/sec";
1311 
1312         total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd);
1313         if (total)
1314             ratio = convert_unit_double(1000000000.0 * avg / total, &unit);
1315 
1316         if (unit != ' ')
1317             snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
1318         print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
1319     } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
1320         print_smi_cost(config, cpu_map_idx, out, st, &rsd);
1321     } else {
1322         num = 0;
1323     }
1324 
1325     if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) {
1326         struct metric_expr *mexp;
1327 
1328         list_for_each_entry (mexp, &me->head, nd) {
1329             if (num++ > 0)
1330                 out->new_line(config, ctxp);
1331             generic_metric(config, mexp->metric_expr, mexp->metric_events,
1332                     mexp->metric_refs, evsel->name, mexp->metric_name,
1333                     mexp->metric_unit, mexp->runtime, cpu_map_idx, out, st);
1334         }
1335     }
1336     if (num == 0)
1337         print_metric(config, ctxp, NULL, NULL, NULL, 0);
1338 }