Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 #include <errno.h>
0003 #include <inttypes.h>
0004 #include "builtin.h"
0005 #include "perf.h"
0006 
0007 #include "util/evlist.h" // for struct evsel_str_handler
0008 #include "util/evsel.h"
0009 #include "util/symbol.h"
0010 #include "util/thread.h"
0011 #include "util/header.h"
0012 #include "util/target.h"
0013 #include "util/callchain.h"
0014 #include "util/lock-contention.h"
0015 
0016 #include <subcmd/pager.h>
0017 #include <subcmd/parse-options.h>
0018 #include "util/trace-event.h"
0019 #include "util/tracepoint.h"
0020 
0021 #include "util/debug.h"
0022 #include "util/session.h"
0023 #include "util/tool.h"
0024 #include "util/data.h"
0025 #include "util/string2.h"
0026 #include "util/map.h"
0027 
0028 #include <sys/types.h>
0029 #include <sys/prctl.h>
0030 #include <semaphore.h>
0031 #include <pthread.h>
0032 #include <math.h>
0033 #include <limits.h>
0034 
0035 #include <linux/list.h>
0036 #include <linux/hash.h>
0037 #include <linux/kernel.h>
0038 #include <linux/zalloc.h>
0039 #include <linux/err.h>
0040 #include <linux/stringify.h>
0041 
0042 static struct perf_session *session;
0043 static struct target target;
0044 
0045 /* based on kernel/lockdep.c */
0046 #define LOCKHASH_BITS       12
0047 #define LOCKHASH_SIZE       (1UL << LOCKHASH_BITS)
0048 
0049 static struct hlist_head lockhash_table[LOCKHASH_SIZE];
0050 
0051 #define __lockhashfn(key)   hash_long((unsigned long)key, LOCKHASH_BITS)
0052 #define lockhashentry(key)  (lockhash_table + __lockhashfn((key)))
0053 
0054 static struct rb_root       thread_stats;
0055 
0056 static bool combine_locks;
0057 static bool show_thread_stats;
0058 static bool use_bpf;
0059 static unsigned long bpf_map_entries = 10240;
0060 
0061 static enum {
0062     LOCK_AGGR_ADDR,
0063     LOCK_AGGR_TASK,
0064     LOCK_AGGR_CALLER,
0065 } aggr_mode = LOCK_AGGR_ADDR;
0066 
0067 static u64 sched_text_start;
0068 static u64 sched_text_end;
0069 static u64 lock_text_start;
0070 static u64 lock_text_end;
0071 
0072 static struct thread_stat *thread_stat_find(u32 tid)
0073 {
0074     struct rb_node *node;
0075     struct thread_stat *st;
0076 
0077     node = thread_stats.rb_node;
0078     while (node) {
0079         st = container_of(node, struct thread_stat, rb);
0080         if (st->tid == tid)
0081             return st;
0082         else if (tid < st->tid)
0083             node = node->rb_left;
0084         else
0085             node = node->rb_right;
0086     }
0087 
0088     return NULL;
0089 }
0090 
0091 static void thread_stat_insert(struct thread_stat *new)
0092 {
0093     struct rb_node **rb = &thread_stats.rb_node;
0094     struct rb_node *parent = NULL;
0095     struct thread_stat *p;
0096 
0097     while (*rb) {
0098         p = container_of(*rb, struct thread_stat, rb);
0099         parent = *rb;
0100 
0101         if (new->tid < p->tid)
0102             rb = &(*rb)->rb_left;
0103         else if (new->tid > p->tid)
0104             rb = &(*rb)->rb_right;
0105         else
0106             BUG_ON("inserting invalid thread_stat\n");
0107     }
0108 
0109     rb_link_node(&new->rb, parent, rb);
0110     rb_insert_color(&new->rb, &thread_stats);
0111 }
0112 
0113 static struct thread_stat *thread_stat_findnew_after_first(u32 tid)
0114 {
0115     struct thread_stat *st;
0116 
0117     st = thread_stat_find(tid);
0118     if (st)
0119         return st;
0120 
0121     st = zalloc(sizeof(struct thread_stat));
0122     if (!st) {
0123         pr_err("memory allocation failed\n");
0124         return NULL;
0125     }
0126 
0127     st->tid = tid;
0128     INIT_LIST_HEAD(&st->seq_list);
0129 
0130     thread_stat_insert(st);
0131 
0132     return st;
0133 }
0134 
0135 static struct thread_stat *thread_stat_findnew_first(u32 tid);
0136 static struct thread_stat *(*thread_stat_findnew)(u32 tid) =
0137     thread_stat_findnew_first;
0138 
0139 static struct thread_stat *thread_stat_findnew_first(u32 tid)
0140 {
0141     struct thread_stat *st;
0142 
0143     st = zalloc(sizeof(struct thread_stat));
0144     if (!st) {
0145         pr_err("memory allocation failed\n");
0146         return NULL;
0147     }
0148     st->tid = tid;
0149     INIT_LIST_HEAD(&st->seq_list);
0150 
0151     rb_link_node(&st->rb, NULL, &thread_stats.rb_node);
0152     rb_insert_color(&st->rb, &thread_stats);
0153 
0154     thread_stat_findnew = thread_stat_findnew_after_first;
0155     return st;
0156 }
0157 
0158 /* build simple key function one is bigger than two */
0159 #define SINGLE_KEY(member)                      \
0160     static int lock_stat_key_ ## member(struct lock_stat *one,  \
0161                      struct lock_stat *two)     \
0162     {                               \
0163         return one->member > two->member;           \
0164     }
0165 
0166 SINGLE_KEY(nr_acquired)
0167 SINGLE_KEY(nr_contended)
0168 SINGLE_KEY(avg_wait_time)
0169 SINGLE_KEY(wait_time_total)
0170 SINGLE_KEY(wait_time_max)
0171 
0172 static int lock_stat_key_wait_time_min(struct lock_stat *one,
0173                     struct lock_stat *two)
0174 {
0175     u64 s1 = one->wait_time_min;
0176     u64 s2 = two->wait_time_min;
0177     if (s1 == ULLONG_MAX)
0178         s1 = 0;
0179     if (s2 == ULLONG_MAX)
0180         s2 = 0;
0181     return s1 > s2;
0182 }
0183 
0184 struct lock_key {
0185     /*
0186      * name: the value for specify by user
0187      * this should be simpler than raw name of member
0188      * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
0189      */
0190     const char      *name;
0191     /* header: the string printed on the header line */
0192     const char      *header;
0193     /* len: the printing width of the field */
0194     int         len;
0195     /* key: a pointer to function to compare two lock stats for sorting */
0196     int         (*key)(struct lock_stat*, struct lock_stat*);
0197     /* print: a pointer to function to print a given lock stats */
0198     void            (*print)(struct lock_key*, struct lock_stat*);
0199     /* list: list entry to link this */
0200     struct list_head    list;
0201 };
0202 
0203 static void lock_stat_key_print_time(unsigned long long nsec, int len)
0204 {
0205     static const struct {
0206         float base;
0207         const char *unit;
0208     } table[] = {
0209         { 1e9 * 3600, "h " },
0210         { 1e9 * 60, "m " },
0211         { 1e9, "s " },
0212         { 1e6, "ms" },
0213         { 1e3, "us" },
0214         { 0, NULL },
0215     };
0216 
0217     for (int i = 0; table[i].unit; i++) {
0218         if (nsec < table[i].base)
0219             continue;
0220 
0221         pr_info("%*.2f %s", len - 3, nsec / table[i].base, table[i].unit);
0222         return;
0223     }
0224 
0225     pr_info("%*llu %s", len - 3, nsec, "ns");
0226 }
0227 
0228 #define PRINT_KEY(member)                       \
0229 static void lock_stat_key_print_ ## member(struct lock_key *key,    \
0230                        struct lock_stat *ls)    \
0231 {                                   \
0232     pr_info("%*llu", key->len, (unsigned long long)ls->member); \
0233 }
0234 
0235 #define PRINT_TIME(member)                      \
0236 static void lock_stat_key_print_ ## member(struct lock_key *key,    \
0237                        struct lock_stat *ls)    \
0238 {                                   \
0239     lock_stat_key_print_time((unsigned long long)ls->member, key->len); \
0240 }
0241 
0242 PRINT_KEY(nr_acquired)
0243 PRINT_KEY(nr_contended)
0244 PRINT_TIME(avg_wait_time)
0245 PRINT_TIME(wait_time_total)
0246 PRINT_TIME(wait_time_max)
0247 
0248 static void lock_stat_key_print_wait_time_min(struct lock_key *key,
0249                           struct lock_stat *ls)
0250 {
0251     u64 wait_time = ls->wait_time_min;
0252 
0253     if (wait_time == ULLONG_MAX)
0254         wait_time = 0;
0255 
0256     lock_stat_key_print_time(wait_time, key->len);
0257 }
0258 
0259 
0260 static const char       *sort_key = "acquired";
0261 
0262 static int          (*compare)(struct lock_stat *, struct lock_stat *);
0263 
0264 static struct rb_root       sorted; /* place to store intermediate data */
0265 static struct rb_root       result; /* place to store sorted data */
0266 
0267 static LIST_HEAD(lock_keys);
0268 static const char       *output_fields;
0269 
0270 #define DEF_KEY_LOCK(name, header, fn_suffix, len)          \
0271     { #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} }
0272 static struct lock_key report_keys[] = {
0273     DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10),
0274     DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
0275     DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
0276     DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
0277     DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
0278     DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
0279 
0280     /* extra comparisons much complicated should be here */
0281     { }
0282 };
0283 
0284 static struct lock_key contention_keys[] = {
0285     DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
0286     DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
0287     DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
0288     DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
0289     DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
0290 
0291     /* extra comparisons much complicated should be here */
0292     { }
0293 };
0294 
0295 static int select_key(bool contention)
0296 {
0297     int i;
0298     struct lock_key *keys = report_keys;
0299 
0300     if (contention)
0301         keys = contention_keys;
0302 
0303     for (i = 0; keys[i].name; i++) {
0304         if (!strcmp(keys[i].name, sort_key)) {
0305             compare = keys[i].key;
0306 
0307             /* selected key should be in the output fields */
0308             if (list_empty(&keys[i].list))
0309                 list_add_tail(&keys[i].list, &lock_keys);
0310 
0311             return 0;
0312         }
0313     }
0314 
0315     pr_err("Unknown compare key: %s\n", sort_key);
0316     return -1;
0317 }
0318 
0319 static int add_output_field(bool contention, char *name)
0320 {
0321     int i;
0322     struct lock_key *keys = report_keys;
0323 
0324     if (contention)
0325         keys = contention_keys;
0326 
0327     for (i = 0; keys[i].name; i++) {
0328         if (strcmp(keys[i].name, name))
0329             continue;
0330 
0331         /* prevent double link */
0332         if (list_empty(&keys[i].list))
0333             list_add_tail(&keys[i].list, &lock_keys);
0334 
0335         return 0;
0336     }
0337 
0338     pr_err("Unknown output field: %s\n", name);
0339     return -1;
0340 }
0341 
0342 static int setup_output_field(bool contention, const char *str)
0343 {
0344     char *tok, *tmp, *orig;
0345     int i, ret = 0;
0346     struct lock_key *keys = report_keys;
0347 
0348     if (contention)
0349         keys = contention_keys;
0350 
0351     /* no output field given: use all of them */
0352     if (str == NULL) {
0353         for (i = 0; keys[i].name; i++)
0354             list_add_tail(&keys[i].list, &lock_keys);
0355         return 0;
0356     }
0357 
0358     for (i = 0; keys[i].name; i++)
0359         INIT_LIST_HEAD(&keys[i].list);
0360 
0361     orig = tmp = strdup(str);
0362     if (orig == NULL)
0363         return -ENOMEM;
0364 
0365     while ((tok = strsep(&tmp, ",")) != NULL){
0366         ret = add_output_field(contention, tok);
0367         if (ret < 0)
0368             break;
0369     }
0370     free(orig);
0371 
0372     return ret;
0373 }
0374 
0375 static void combine_lock_stats(struct lock_stat *st)
0376 {
0377     struct rb_node **rb = &sorted.rb_node;
0378     struct rb_node *parent = NULL;
0379     struct lock_stat *p;
0380     int ret;
0381 
0382     while (*rb) {
0383         p = container_of(*rb, struct lock_stat, rb);
0384         parent = *rb;
0385 
0386         if (st->name && p->name)
0387             ret = strcmp(st->name, p->name);
0388         else
0389             ret = !!st->name - !!p->name;
0390 
0391         if (ret == 0) {
0392             p->nr_acquired += st->nr_acquired;
0393             p->nr_contended += st->nr_contended;
0394             p->wait_time_total += st->wait_time_total;
0395 
0396             if (p->nr_contended)
0397                 p->avg_wait_time = p->wait_time_total / p->nr_contended;
0398 
0399             if (p->wait_time_min > st->wait_time_min)
0400                 p->wait_time_min = st->wait_time_min;
0401             if (p->wait_time_max < st->wait_time_max)
0402                 p->wait_time_max = st->wait_time_max;
0403 
0404             p->broken |= st->broken;
0405             st->combined = 1;
0406             return;
0407         }
0408 
0409         if (ret < 0)
0410             rb = &(*rb)->rb_left;
0411         else
0412             rb = &(*rb)->rb_right;
0413     }
0414 
0415     rb_link_node(&st->rb, parent, rb);
0416     rb_insert_color(&st->rb, &sorted);
0417 }
0418 
0419 static void insert_to_result(struct lock_stat *st,
0420                  int (*bigger)(struct lock_stat *, struct lock_stat *))
0421 {
0422     struct rb_node **rb = &result.rb_node;
0423     struct rb_node *parent = NULL;
0424     struct lock_stat *p;
0425 
0426     if (combine_locks && st->combined)
0427         return;
0428 
0429     while (*rb) {
0430         p = container_of(*rb, struct lock_stat, rb);
0431         parent = *rb;
0432 
0433         if (bigger(st, p))
0434             rb = &(*rb)->rb_left;
0435         else
0436             rb = &(*rb)->rb_right;
0437     }
0438 
0439     rb_link_node(&st->rb, parent, rb);
0440     rb_insert_color(&st->rb, &result);
0441 }
0442 
0443 /* returns left most element of result, and erase it */
0444 static struct lock_stat *pop_from_result(void)
0445 {
0446     struct rb_node *node = result.rb_node;
0447 
0448     if (!node)
0449         return NULL;
0450 
0451     while (node->rb_left)
0452         node = node->rb_left;
0453 
0454     rb_erase(node, &result);
0455     return container_of(node, struct lock_stat, rb);
0456 }
0457 
0458 static struct lock_stat *lock_stat_find(u64 addr)
0459 {
0460     struct hlist_head *entry = lockhashentry(addr);
0461     struct lock_stat *ret;
0462 
0463     hlist_for_each_entry(ret, entry, hash_entry) {
0464         if (ret->addr == addr)
0465             return ret;
0466     }
0467     return NULL;
0468 }
0469 
0470 static struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
0471 {
0472     struct hlist_head *entry = lockhashentry(addr);
0473     struct lock_stat *ret, *new;
0474 
0475     hlist_for_each_entry(ret, entry, hash_entry) {
0476         if (ret->addr == addr)
0477             return ret;
0478     }
0479 
0480     new = zalloc(sizeof(struct lock_stat));
0481     if (!new)
0482         goto alloc_failed;
0483 
0484     new->addr = addr;
0485     new->name = strdup(name);
0486     if (!new->name) {
0487         free(new);
0488         goto alloc_failed;
0489     }
0490 
0491     new->flags = flags;
0492     new->wait_time_min = ULLONG_MAX;
0493 
0494     hlist_add_head(&new->hash_entry, entry);
0495     return new;
0496 
0497 alloc_failed:
0498     pr_err("memory allocation failed\n");
0499     return NULL;
0500 }
0501 
0502 struct trace_lock_handler {
0503     /* it's used on CONFIG_LOCKDEP */
0504     int (*acquire_event)(struct evsel *evsel,
0505                  struct perf_sample *sample);
0506 
0507     /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
0508     int (*acquired_event)(struct evsel *evsel,
0509                   struct perf_sample *sample);
0510 
0511     /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
0512     int (*contended_event)(struct evsel *evsel,
0513                    struct perf_sample *sample);
0514 
0515     /* it's used on CONFIG_LOCKDEP */
0516     int (*release_event)(struct evsel *evsel,
0517                  struct perf_sample *sample);
0518 
0519     /* it's used when CONFIG_LOCKDEP is off */
0520     int (*contention_begin_event)(struct evsel *evsel,
0521                       struct perf_sample *sample);
0522 
0523     /* it's used when CONFIG_LOCKDEP is off */
0524     int (*contention_end_event)(struct evsel *evsel,
0525                     struct perf_sample *sample);
0526 };
0527 
0528 static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr)
0529 {
0530     struct lock_seq_stat *seq;
0531 
0532     list_for_each_entry(seq, &ts->seq_list, list) {
0533         if (seq->addr == addr)
0534             return seq;
0535     }
0536 
0537     seq = zalloc(sizeof(struct lock_seq_stat));
0538     if (!seq) {
0539         pr_err("memory allocation failed\n");
0540         return NULL;
0541     }
0542     seq->state = SEQ_STATE_UNINITIALIZED;
0543     seq->addr = addr;
0544 
0545     list_add(&seq->list, &ts->seq_list);
0546     return seq;
0547 }
0548 
0549 enum broken_state {
0550     BROKEN_ACQUIRE,
0551     BROKEN_ACQUIRED,
0552     BROKEN_CONTENDED,
0553     BROKEN_RELEASE,
0554     BROKEN_MAX,
0555 };
0556 
0557 static int bad_hist[BROKEN_MAX];
0558 
0559 enum acquire_flags {
0560     TRY_LOCK = 1,
0561     READ_LOCK = 2,
0562 };
0563 
0564 static int report_lock_acquire_event(struct evsel *evsel,
0565                      struct perf_sample *sample)
0566 {
0567     struct lock_stat *ls;
0568     struct thread_stat *ts;
0569     struct lock_seq_stat *seq;
0570     const char *name = evsel__strval(evsel, sample, "name");
0571     u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
0572     int flag = evsel__intval(evsel, sample, "flags");
0573     u64 key;
0574 
0575     switch (aggr_mode) {
0576     case LOCK_AGGR_ADDR:
0577         key = addr;
0578         break;
0579     case LOCK_AGGR_TASK:
0580         key = sample->tid;
0581         break;
0582     case LOCK_AGGR_CALLER:
0583     default:
0584         pr_err("Invalid aggregation mode: %d\n", aggr_mode);
0585         return -EINVAL;
0586     }
0587 
0588     ls = lock_stat_findnew(key, name, 0);
0589     if (!ls)
0590         return -ENOMEM;
0591 
0592     ts = thread_stat_findnew(sample->tid);
0593     if (!ts)
0594         return -ENOMEM;
0595 
0596     seq = get_seq(ts, addr);
0597     if (!seq)
0598         return -ENOMEM;
0599 
0600     switch (seq->state) {
0601     case SEQ_STATE_UNINITIALIZED:
0602     case SEQ_STATE_RELEASED:
0603         if (!flag) {
0604             seq->state = SEQ_STATE_ACQUIRING;
0605         } else {
0606             if (flag & TRY_LOCK)
0607                 ls->nr_trylock++;
0608             if (flag & READ_LOCK)
0609                 ls->nr_readlock++;
0610             seq->state = SEQ_STATE_READ_ACQUIRED;
0611             seq->read_count = 1;
0612             ls->nr_acquired++;
0613         }
0614         break;
0615     case SEQ_STATE_READ_ACQUIRED:
0616         if (flag & READ_LOCK) {
0617             seq->read_count++;
0618             ls->nr_acquired++;
0619             goto end;
0620         } else {
0621             goto broken;
0622         }
0623         break;
0624     case SEQ_STATE_ACQUIRED:
0625     case SEQ_STATE_ACQUIRING:
0626     case SEQ_STATE_CONTENDED:
0627 broken:
0628         /* broken lock sequence */
0629         if (!ls->broken) {
0630             ls->broken = 1;
0631             bad_hist[BROKEN_ACQUIRE]++;
0632         }
0633         list_del_init(&seq->list);
0634         free(seq);
0635         goto end;
0636     default:
0637         BUG_ON("Unknown state of lock sequence found!\n");
0638         break;
0639     }
0640 
0641     ls->nr_acquire++;
0642     seq->prev_event_time = sample->time;
0643 end:
0644     return 0;
0645 }
0646 
0647 static int report_lock_acquired_event(struct evsel *evsel,
0648                       struct perf_sample *sample)
0649 {
0650     struct lock_stat *ls;
0651     struct thread_stat *ts;
0652     struct lock_seq_stat *seq;
0653     u64 contended_term;
0654     const char *name = evsel__strval(evsel, sample, "name");
0655     u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
0656     u64 key;
0657 
0658     switch (aggr_mode) {
0659     case LOCK_AGGR_ADDR:
0660         key = addr;
0661         break;
0662     case LOCK_AGGR_TASK:
0663         key = sample->tid;
0664         break;
0665     case LOCK_AGGR_CALLER:
0666     default:
0667         pr_err("Invalid aggregation mode: %d\n", aggr_mode);
0668         return -EINVAL;
0669     }
0670 
0671     ls = lock_stat_findnew(key, name, 0);
0672     if (!ls)
0673         return -ENOMEM;
0674 
0675     ts = thread_stat_findnew(sample->tid);
0676     if (!ts)
0677         return -ENOMEM;
0678 
0679     seq = get_seq(ts, addr);
0680     if (!seq)
0681         return -ENOMEM;
0682 
0683     switch (seq->state) {
0684     case SEQ_STATE_UNINITIALIZED:
0685         /* orphan event, do nothing */
0686         return 0;
0687     case SEQ_STATE_ACQUIRING:
0688         break;
0689     case SEQ_STATE_CONTENDED:
0690         contended_term = sample->time - seq->prev_event_time;
0691         ls->wait_time_total += contended_term;
0692         if (contended_term < ls->wait_time_min)
0693             ls->wait_time_min = contended_term;
0694         if (ls->wait_time_max < contended_term)
0695             ls->wait_time_max = contended_term;
0696         break;
0697     case SEQ_STATE_RELEASED:
0698     case SEQ_STATE_ACQUIRED:
0699     case SEQ_STATE_READ_ACQUIRED:
0700         /* broken lock sequence */
0701         if (!ls->broken) {
0702             ls->broken = 1;
0703             bad_hist[BROKEN_ACQUIRED]++;
0704         }
0705         list_del_init(&seq->list);
0706         free(seq);
0707         goto end;
0708     default:
0709         BUG_ON("Unknown state of lock sequence found!\n");
0710         break;
0711     }
0712 
0713     seq->state = SEQ_STATE_ACQUIRED;
0714     ls->nr_acquired++;
0715     ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
0716     seq->prev_event_time = sample->time;
0717 end:
0718     return 0;
0719 }
0720 
0721 static int report_lock_contended_event(struct evsel *evsel,
0722                        struct perf_sample *sample)
0723 {
0724     struct lock_stat *ls;
0725     struct thread_stat *ts;
0726     struct lock_seq_stat *seq;
0727     const char *name = evsel__strval(evsel, sample, "name");
0728     u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
0729     u64 key;
0730 
0731     switch (aggr_mode) {
0732     case LOCK_AGGR_ADDR:
0733         key = addr;
0734         break;
0735     case LOCK_AGGR_TASK:
0736         key = sample->tid;
0737         break;
0738     case LOCK_AGGR_CALLER:
0739     default:
0740         pr_err("Invalid aggregation mode: %d\n", aggr_mode);
0741         return -EINVAL;
0742     }
0743 
0744     ls = lock_stat_findnew(key, name, 0);
0745     if (!ls)
0746         return -ENOMEM;
0747 
0748     ts = thread_stat_findnew(sample->tid);
0749     if (!ts)
0750         return -ENOMEM;
0751 
0752     seq = get_seq(ts, addr);
0753     if (!seq)
0754         return -ENOMEM;
0755 
0756     switch (seq->state) {
0757     case SEQ_STATE_UNINITIALIZED:
0758         /* orphan event, do nothing */
0759         return 0;
0760     case SEQ_STATE_ACQUIRING:
0761         break;
0762     case SEQ_STATE_RELEASED:
0763     case SEQ_STATE_ACQUIRED:
0764     case SEQ_STATE_READ_ACQUIRED:
0765     case SEQ_STATE_CONTENDED:
0766         /* broken lock sequence */
0767         if (!ls->broken) {
0768             ls->broken = 1;
0769             bad_hist[BROKEN_CONTENDED]++;
0770         }
0771         list_del_init(&seq->list);
0772         free(seq);
0773         goto end;
0774     default:
0775         BUG_ON("Unknown state of lock sequence found!\n");
0776         break;
0777     }
0778 
0779     seq->state = SEQ_STATE_CONTENDED;
0780     ls->nr_contended++;
0781     ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
0782     seq->prev_event_time = sample->time;
0783 end:
0784     return 0;
0785 }
0786 
0787 static int report_lock_release_event(struct evsel *evsel,
0788                      struct perf_sample *sample)
0789 {
0790     struct lock_stat *ls;
0791     struct thread_stat *ts;
0792     struct lock_seq_stat *seq;
0793     const char *name = evsel__strval(evsel, sample, "name");
0794     u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
0795     u64 key;
0796 
0797     switch (aggr_mode) {
0798     case LOCK_AGGR_ADDR:
0799         key = addr;
0800         break;
0801     case LOCK_AGGR_TASK:
0802         key = sample->tid;
0803         break;
0804     case LOCK_AGGR_CALLER:
0805     default:
0806         pr_err("Invalid aggregation mode: %d\n", aggr_mode);
0807         return -EINVAL;
0808     }
0809 
0810     ls = lock_stat_findnew(key, name, 0);
0811     if (!ls)
0812         return -ENOMEM;
0813 
0814     ts = thread_stat_findnew(sample->tid);
0815     if (!ts)
0816         return -ENOMEM;
0817 
0818     seq = get_seq(ts, addr);
0819     if (!seq)
0820         return -ENOMEM;
0821 
0822     switch (seq->state) {
0823     case SEQ_STATE_UNINITIALIZED:
0824         goto end;
0825     case SEQ_STATE_ACQUIRED:
0826         break;
0827     case SEQ_STATE_READ_ACQUIRED:
0828         seq->read_count--;
0829         BUG_ON(seq->read_count < 0);
0830         if (seq->read_count) {
0831             ls->nr_release++;
0832             goto end;
0833         }
0834         break;
0835     case SEQ_STATE_ACQUIRING:
0836     case SEQ_STATE_CONTENDED:
0837     case SEQ_STATE_RELEASED:
0838         /* broken lock sequence */
0839         if (!ls->broken) {
0840             ls->broken = 1;
0841             bad_hist[BROKEN_RELEASE]++;
0842         }
0843         goto free_seq;
0844     default:
0845         BUG_ON("Unknown state of lock sequence found!\n");
0846         break;
0847     }
0848 
0849     ls->nr_release++;
0850 free_seq:
0851     list_del_init(&seq->list);
0852     free(seq);
0853 end:
0854     return 0;
0855 }
0856 
0857 bool is_lock_function(struct machine *machine, u64 addr)
0858 {
0859     if (!sched_text_start) {
0860         struct map *kmap;
0861         struct symbol *sym;
0862 
0863         sym = machine__find_kernel_symbol_by_name(machine,
0864                               "__sched_text_start",
0865                               &kmap);
0866         if (!sym) {
0867             /* to avoid retry */
0868             sched_text_start = 1;
0869             return false;
0870         }
0871 
0872         sched_text_start = kmap->unmap_ip(kmap, sym->start);
0873 
0874         /* should not fail from here */
0875         sym = machine__find_kernel_symbol_by_name(machine,
0876                               "__sched_text_end",
0877                               &kmap);
0878         sched_text_end = kmap->unmap_ip(kmap, sym->start);
0879 
0880         sym = machine__find_kernel_symbol_by_name(machine,
0881                               "__lock_text_start",
0882                               &kmap);
0883         lock_text_start = kmap->unmap_ip(kmap, sym->start);
0884 
0885         sym = machine__find_kernel_symbol_by_name(machine,
0886                               "__lock_text_end",
0887                               &kmap);
0888         lock_text_end = kmap->unmap_ip(kmap, sym->start);
0889     }
0890 
0891     /* failed to get kernel symbols */
0892     if (sched_text_start == 1)
0893         return false;
0894 
0895     /* mutex and rwsem functions are in sched text section */
0896     if (sched_text_start <= addr && addr < sched_text_end)
0897         return true;
0898 
0899     /* spinlock functions are in lock text section */
0900     if (lock_text_start <= addr && addr < lock_text_end)
0901         return true;
0902 
0903     return false;
0904 }
0905 
0906 static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample,
0907                   char *buf, int size)
0908 {
0909     struct thread *thread;
0910     struct callchain_cursor *cursor = &callchain_cursor;
0911     struct machine *machine = &session->machines.host;
0912     struct symbol *sym;
0913     int skip = 0;
0914     int ret;
0915 
0916     /* lock names will be replaced to task name later */
0917     if (show_thread_stats)
0918         return -1;
0919 
0920     thread = machine__findnew_thread(machine, -1, sample->pid);
0921     if (thread == NULL)
0922         return -1;
0923 
0924     /* use caller function name from the callchain */
0925     ret = thread__resolve_callchain(thread, cursor, evsel, sample,
0926                     NULL, NULL, CONTENTION_STACK_DEPTH);
0927     if (ret != 0) {
0928         thread__put(thread);
0929         return -1;
0930     }
0931 
0932     callchain_cursor_commit(cursor);
0933     thread__put(thread);
0934 
0935     while (true) {
0936         struct callchain_cursor_node *node;
0937 
0938         node = callchain_cursor_current(cursor);
0939         if (node == NULL)
0940             break;
0941 
0942         /* skip first few entries - for lock functions */
0943         if (++skip <= CONTENTION_STACK_SKIP)
0944             goto next;
0945 
0946         sym = node->ms.sym;
0947         if (sym && !is_lock_function(machine, node->ip)) {
0948             struct map *map = node->ms.map;
0949             u64 offset;
0950 
0951             offset = map->map_ip(map, node->ip) - sym->start;
0952 
0953             if (offset)
0954                 scnprintf(buf, size, "%s+%#lx", sym->name, offset);
0955             else
0956                 strlcpy(buf, sym->name, size);
0957             return 0;
0958         }
0959 
0960 next:
0961         callchain_cursor_advance(cursor);
0962     }
0963     return -1;
0964 }
0965 
0966 static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample)
0967 {
0968     struct callchain_cursor *cursor = &callchain_cursor;
0969     struct machine *machine = &session->machines.host;
0970     struct thread *thread;
0971     u64 hash = 0;
0972     int skip = 0;
0973     int ret;
0974 
0975     thread = machine__findnew_thread(machine, -1, sample->pid);
0976     if (thread == NULL)
0977         return -1;
0978 
0979     /* use caller function name from the callchain */
0980     ret = thread__resolve_callchain(thread, cursor, evsel, sample,
0981                     NULL, NULL, CONTENTION_STACK_DEPTH);
0982     thread__put(thread);
0983 
0984     if (ret != 0)
0985         return -1;
0986 
0987     callchain_cursor_commit(cursor);
0988 
0989     while (true) {
0990         struct callchain_cursor_node *node;
0991 
0992         node = callchain_cursor_current(cursor);
0993         if (node == NULL)
0994             break;
0995 
0996         /* skip first few entries - for lock functions */
0997         if (++skip <= CONTENTION_STACK_SKIP)
0998             goto next;
0999 
1000         if (node->ms.sym && is_lock_function(machine, node->ip))
1001             goto next;
1002 
1003         hash ^= hash_long((unsigned long)node->ip, 64);
1004 
1005 next:
1006         callchain_cursor_advance(cursor);
1007     }
1008     return hash;
1009 }
1010 
1011 static int report_lock_contention_begin_event(struct evsel *evsel,
1012                           struct perf_sample *sample)
1013 {
1014     struct lock_stat *ls;
1015     struct thread_stat *ts;
1016     struct lock_seq_stat *seq;
1017     u64 addr = evsel__intval(evsel, sample, "lock_addr");
1018     u64 key;
1019 
1020     switch (aggr_mode) {
1021     case LOCK_AGGR_ADDR:
1022         key = addr;
1023         break;
1024     case LOCK_AGGR_TASK:
1025         key = sample->tid;
1026         break;
1027     case LOCK_AGGR_CALLER:
1028         key = callchain_id(evsel, sample);
1029         break;
1030     default:
1031         pr_err("Invalid aggregation mode: %d\n", aggr_mode);
1032         return -EINVAL;
1033     }
1034 
1035     ls = lock_stat_find(key);
1036     if (!ls) {
1037         char buf[128];
1038         const char *caller = buf;
1039         unsigned int flags = evsel__intval(evsel, sample, "flags");
1040 
1041         if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0)
1042             caller = "Unknown";
1043 
1044         ls = lock_stat_findnew(key, caller, flags);
1045         if (!ls)
1046             return -ENOMEM;
1047     }
1048 
1049     ts = thread_stat_findnew(sample->tid);
1050     if (!ts)
1051         return -ENOMEM;
1052 
1053     seq = get_seq(ts, addr);
1054     if (!seq)
1055         return -ENOMEM;
1056 
1057     switch (seq->state) {
1058     case SEQ_STATE_UNINITIALIZED:
1059     case SEQ_STATE_ACQUIRED:
1060         break;
1061     case SEQ_STATE_CONTENDED:
1062         /*
1063          * It can have nested contention begin with mutex spinning,
1064          * then we would use the original contention begin event and
1065          * ignore the second one.
1066          */
1067         goto end;
1068     case SEQ_STATE_ACQUIRING:
1069     case SEQ_STATE_READ_ACQUIRED:
1070     case SEQ_STATE_RELEASED:
1071         /* broken lock sequence */
1072         if (!ls->broken) {
1073             ls->broken = 1;
1074             bad_hist[BROKEN_CONTENDED]++;
1075         }
1076         list_del_init(&seq->list);
1077         free(seq);
1078         goto end;
1079     default:
1080         BUG_ON("Unknown state of lock sequence found!\n");
1081         break;
1082     }
1083 
1084     if (seq->state != SEQ_STATE_CONTENDED) {
1085         seq->state = SEQ_STATE_CONTENDED;
1086         seq->prev_event_time = sample->time;
1087         ls->nr_contended++;
1088     }
1089 end:
1090     return 0;
1091 }
1092 
1093 static int report_lock_contention_end_event(struct evsel *evsel,
1094                         struct perf_sample *sample)
1095 {
1096     struct lock_stat *ls;
1097     struct thread_stat *ts;
1098     struct lock_seq_stat *seq;
1099     u64 contended_term;
1100     u64 addr = evsel__intval(evsel, sample, "lock_addr");
1101     u64 key;
1102 
1103     switch (aggr_mode) {
1104     case LOCK_AGGR_ADDR:
1105         key = addr;
1106         break;
1107     case LOCK_AGGR_TASK:
1108         key = sample->tid;
1109         break;
1110     case LOCK_AGGR_CALLER:
1111         key = callchain_id(evsel, sample);
1112         break;
1113     default:
1114         pr_err("Invalid aggregation mode: %d\n", aggr_mode);
1115         return -EINVAL;
1116     }
1117 
1118     ls = lock_stat_find(key);
1119     if (!ls)
1120         return 0;
1121 
1122     ts = thread_stat_find(sample->tid);
1123     if (!ts)
1124         return 0;
1125 
1126     seq = get_seq(ts, addr);
1127     if (!seq)
1128         return -ENOMEM;
1129 
1130     switch (seq->state) {
1131     case SEQ_STATE_UNINITIALIZED:
1132         goto end;
1133     case SEQ_STATE_CONTENDED:
1134         contended_term = sample->time - seq->prev_event_time;
1135         ls->wait_time_total += contended_term;
1136         if (contended_term < ls->wait_time_min)
1137             ls->wait_time_min = contended_term;
1138         if (ls->wait_time_max < contended_term)
1139             ls->wait_time_max = contended_term;
1140         break;
1141     case SEQ_STATE_ACQUIRING:
1142     case SEQ_STATE_ACQUIRED:
1143     case SEQ_STATE_READ_ACQUIRED:
1144     case SEQ_STATE_RELEASED:
1145         /* broken lock sequence */
1146         if (!ls->broken) {
1147             ls->broken = 1;
1148             bad_hist[BROKEN_ACQUIRED]++;
1149         }
1150         list_del_init(&seq->list);
1151         free(seq);
1152         goto end;
1153     default:
1154         BUG_ON("Unknown state of lock sequence found!\n");
1155         break;
1156     }
1157 
1158     seq->state = SEQ_STATE_ACQUIRED;
1159     ls->nr_acquired++;
1160     ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired;
1161 end:
1162     return 0;
1163 }
1164 
1165 /* lock oriented handlers */
1166 /* TODO: handlers for CPU oriented, thread oriented */
1167 static struct trace_lock_handler report_lock_ops  = {
1168     .acquire_event      = report_lock_acquire_event,
1169     .acquired_event     = report_lock_acquired_event,
1170     .contended_event    = report_lock_contended_event,
1171     .release_event      = report_lock_release_event,
1172     .contention_begin_event = report_lock_contention_begin_event,
1173     .contention_end_event   = report_lock_contention_end_event,
1174 };
1175 
1176 static struct trace_lock_handler contention_lock_ops  = {
1177     .contention_begin_event = report_lock_contention_begin_event,
1178     .contention_end_event   = report_lock_contention_end_event,
1179 };
1180 
1181 
1182 static struct trace_lock_handler *trace_handler;
1183 
1184 static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample)
1185 {
1186     if (trace_handler->acquire_event)
1187         return trace_handler->acquire_event(evsel, sample);
1188     return 0;
1189 }
1190 
1191 static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample)
1192 {
1193     if (trace_handler->acquired_event)
1194         return trace_handler->acquired_event(evsel, sample);
1195     return 0;
1196 }
1197 
1198 static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample)
1199 {
1200     if (trace_handler->contended_event)
1201         return trace_handler->contended_event(evsel, sample);
1202     return 0;
1203 }
1204 
1205 static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample)
1206 {
1207     if (trace_handler->release_event)
1208         return trace_handler->release_event(evsel, sample);
1209     return 0;
1210 }
1211 
1212 static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample)
1213 {
1214     if (trace_handler->contention_begin_event)
1215         return trace_handler->contention_begin_event(evsel, sample);
1216     return 0;
1217 }
1218 
1219 static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample)
1220 {
1221     if (trace_handler->contention_end_event)
1222         return trace_handler->contention_end_event(evsel, sample);
1223     return 0;
1224 }
1225 
1226 static void print_bad_events(int bad, int total)
1227 {
1228     /* Output for debug, this have to be removed */
1229     int i;
1230     int broken = 0;
1231     const char *name[4] =
1232         { "acquire", "acquired", "contended", "release" };
1233 
1234     for (i = 0; i < BROKEN_MAX; i++)
1235         broken += bad_hist[i];
1236 
1237     if (broken == 0 && !verbose)
1238         return;
1239 
1240     pr_info("\n=== output for debug===\n\n");
1241     pr_info("bad: %d, total: %d\n", bad, total);
1242     pr_info("bad rate: %.2f %%\n", (double)bad / (double)total * 100);
1243     pr_info("histogram of events caused bad sequence\n");
1244     for (i = 0; i < BROKEN_MAX; i++)
1245         pr_info(" %10s: %d\n", name[i], bad_hist[i]);
1246 }
1247 
1248 /* TODO: various way to print, coloring, nano or milli sec */
1249 static void print_result(void)
1250 {
1251     struct lock_stat *st;
1252     struct lock_key *key;
1253     char cut_name[20];
1254     int bad, total;
1255 
1256     pr_info("%20s ", "Name");
1257     list_for_each_entry(key, &lock_keys, list)
1258         pr_info("%*s ", key->len, key->header);
1259     pr_info("\n\n");
1260 
1261     bad = total = 0;
1262     while ((st = pop_from_result())) {
1263         total++;
1264         if (st->broken)
1265             bad++;
1266         if (!st->nr_acquired)
1267             continue;
1268 
1269         bzero(cut_name, 20);
1270 
1271         if (strlen(st->name) < 20) {
1272             /* output raw name */
1273             const char *name = st->name;
1274 
1275             if (show_thread_stats) {
1276                 struct thread *t;
1277 
1278                 /* st->addr contains tid of thread */
1279                 t = perf_session__findnew(session, st->addr);
1280                 name = thread__comm_str(t);
1281             }
1282 
1283             pr_info("%20s ", name);
1284         } else {
1285             strncpy(cut_name, st->name, 16);
1286             cut_name[16] = '.';
1287             cut_name[17] = '.';
1288             cut_name[18] = '.';
1289             cut_name[19] = '\0';
1290             /* cut off name for saving output style */
1291             pr_info("%20s ", cut_name);
1292         }
1293 
1294         list_for_each_entry(key, &lock_keys, list) {
1295             key->print(key, st);
1296             pr_info(" ");
1297         }
1298         pr_info("\n");
1299     }
1300 
1301     print_bad_events(bad, total);
1302 }
1303 
1304 static bool info_threads, info_map;
1305 
1306 static void dump_threads(void)
1307 {
1308     struct thread_stat *st;
1309     struct rb_node *node;
1310     struct thread *t;
1311 
1312     pr_info("%10s: comm\n", "Thread ID");
1313 
1314     node = rb_first(&thread_stats);
1315     while (node) {
1316         st = container_of(node, struct thread_stat, rb);
1317         t = perf_session__findnew(session, st->tid);
1318         pr_info("%10d: %s\n", st->tid, thread__comm_str(t));
1319         node = rb_next(node);
1320         thread__put(t);
1321     }
1322 }
1323 
1324 static int compare_maps(struct lock_stat *a, struct lock_stat *b)
1325 {
1326     int ret;
1327 
1328     if (a->name && b->name)
1329         ret = strcmp(a->name, b->name);
1330     else
1331         ret = !!a->name - !!b->name;
1332 
1333     if (!ret)
1334         return a->addr < b->addr;
1335     else
1336         return ret < 0;
1337 }
1338 
1339 static void dump_map(void)
1340 {
1341     unsigned int i;
1342     struct lock_stat *st;
1343 
1344     pr_info("Address of instance: name of class\n");
1345     for (i = 0; i < LOCKHASH_SIZE; i++) {
1346         hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1347             insert_to_result(st, compare_maps);
1348         }
1349     }
1350 
1351     while ((st = pop_from_result()))
1352         pr_info(" %#llx: %s\n", (unsigned long long)st->addr, st->name);
1353 }
1354 
1355 static int dump_info(void)
1356 {
1357     int rc = 0;
1358 
1359     if (info_threads)
1360         dump_threads();
1361     else if (info_map)
1362         dump_map();
1363     else {
1364         rc = -1;
1365         pr_err("Unknown type of information\n");
1366     }
1367 
1368     return rc;
1369 }
1370 
1371 typedef int (*tracepoint_handler)(struct evsel *evsel,
1372                   struct perf_sample *sample);
1373 
1374 static int process_sample_event(struct perf_tool *tool __maybe_unused,
1375                 union perf_event *event,
1376                 struct perf_sample *sample,
1377                 struct evsel *evsel,
1378                 struct machine *machine)
1379 {
1380     int err = 0;
1381     struct thread *thread = machine__findnew_thread(machine, sample->pid,
1382                             sample->tid);
1383 
1384     if (thread == NULL) {
1385         pr_debug("problem processing %d event, skipping it.\n",
1386             event->header.type);
1387         return -1;
1388     }
1389 
1390     if (evsel->handler != NULL) {
1391         tracepoint_handler f = evsel->handler;
1392         err = f(evsel, sample);
1393     }
1394 
1395     thread__put(thread);
1396 
1397     return err;
1398 }
1399 
1400 static void combine_result(void)
1401 {
1402     unsigned int i;
1403     struct lock_stat *st;
1404 
1405     if (!combine_locks)
1406         return;
1407 
1408     for (i = 0; i < LOCKHASH_SIZE; i++) {
1409         hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1410             combine_lock_stats(st);
1411         }
1412     }
1413 }
1414 
1415 static void sort_result(void)
1416 {
1417     unsigned int i;
1418     struct lock_stat *st;
1419 
1420     for (i = 0; i < LOCKHASH_SIZE; i++) {
1421         hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1422             insert_to_result(st, compare);
1423         }
1424     }
1425 }
1426 
1427 static const char *get_type_str(struct lock_stat *st)
1428 {
1429     static const struct {
1430         unsigned int flags;
1431         const char *name;
1432     } table[] = {
1433         { 0,                "semaphore" },
1434         { LCB_F_SPIN,           "spinlock" },
1435         { LCB_F_SPIN | LCB_F_READ,  "rwlock:R" },
1436         { LCB_F_SPIN | LCB_F_WRITE, "rwlock:W"},
1437         { LCB_F_READ,           "rwsem:R" },
1438         { LCB_F_WRITE,          "rwsem:W" },
1439         { LCB_F_RT,         "rtmutex" },
1440         { LCB_F_RT | LCB_F_READ,    "rwlock-rt:R" },
1441         { LCB_F_RT | LCB_F_WRITE,   "rwlock-rt:W"},
1442         { LCB_F_PERCPU | LCB_F_READ,    "pcpu-sem:R" },
1443         { LCB_F_PERCPU | LCB_F_WRITE,   "pcpu-sem:W" },
1444         { LCB_F_MUTEX,          "mutex" },
1445         { LCB_F_MUTEX | LCB_F_SPIN, "mutex" },
1446     };
1447 
1448     for (unsigned int i = 0; i < ARRAY_SIZE(table); i++) {
1449         if (table[i].flags == st->flags)
1450             return table[i].name;
1451     }
1452     return "unknown";
1453 }
1454 
1455 static void sort_contention_result(void)
1456 {
1457     sort_result();
1458 }
1459 
1460 static void print_contention_result(void)
1461 {
1462     struct lock_stat *st;
1463     struct lock_key *key;
1464     int bad, total;
1465 
1466     list_for_each_entry(key, &lock_keys, list)
1467         pr_info("%*s ", key->len, key->header);
1468 
1469     if (show_thread_stats)
1470         pr_info("  %10s   %s\n\n", "pid", "comm");
1471     else
1472         pr_info("  %10s   %s\n\n", "type", "caller");
1473 
1474     bad = total = 0;
1475     if (use_bpf)
1476         bad = bad_hist[BROKEN_CONTENDED];
1477 
1478     while ((st = pop_from_result())) {
1479         total += use_bpf ? st->nr_contended : 1;
1480         if (st->broken)
1481             bad++;
1482 
1483         list_for_each_entry(key, &lock_keys, list) {
1484             key->print(key, st);
1485             pr_info(" ");
1486         }
1487 
1488         if (show_thread_stats) {
1489             struct thread *t;
1490             int pid = st->addr;
1491 
1492             /* st->addr contains tid of thread */
1493             t = perf_session__findnew(session, pid);
1494             pr_info("  %10d   %s\n", pid, thread__comm_str(t));
1495             continue;
1496         }
1497 
1498         pr_info("  %10s   %s\n", get_type_str(st), st->name);
1499     }
1500 
1501     print_bad_events(bad, total);
1502 }
1503 
1504 static const struct evsel_str_handler lock_tracepoints[] = {
1505     { "lock:lock_acquire",   evsel__process_lock_acquire,   }, /* CONFIG_LOCKDEP */
1506     { "lock:lock_acquired",  evsel__process_lock_acquired,  }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1507     { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1508     { "lock:lock_release",   evsel__process_lock_release,   }, /* CONFIG_LOCKDEP */
1509 };
1510 
1511 static const struct evsel_str_handler contention_tracepoints[] = {
1512     { "lock:contention_begin", evsel__process_contention_begin, },
1513     { "lock:contention_end",   evsel__process_contention_end,   },
1514 };
1515 
1516 static bool force;
1517 
1518 static int __cmd_report(bool display_info)
1519 {
1520     int err = -EINVAL;
1521     struct perf_tool eops = {
1522         .sample      = process_sample_event,
1523         .comm        = perf_event__process_comm,
1524         .mmap        = perf_event__process_mmap,
1525         .namespaces  = perf_event__process_namespaces,
1526         .ordered_events  = true,
1527     };
1528     struct perf_data data = {
1529         .path  = input_name,
1530         .mode  = PERF_DATA_MODE_READ,
1531         .force = force,
1532     };
1533 
1534     session = perf_session__new(&data, &eops);
1535     if (IS_ERR(session)) {
1536         pr_err("Initializing perf session failed\n");
1537         return PTR_ERR(session);
1538     }
1539 
1540     /* for lock function check */
1541     symbol_conf.sort_by_name = true;
1542     symbol__init(&session->header.env);
1543 
1544     if (!perf_session__has_traces(session, "lock record"))
1545         goto out_delete;
1546 
1547     if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
1548         pr_err("Initializing perf session tracepoint handlers failed\n");
1549         goto out_delete;
1550     }
1551 
1552     if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
1553         pr_err("Initializing perf session tracepoint handlers failed\n");
1554         goto out_delete;
1555     }
1556 
1557     if (setup_output_field(false, output_fields))
1558         goto out_delete;
1559 
1560     if (select_key(false))
1561         goto out_delete;
1562 
1563     if (show_thread_stats)
1564         aggr_mode = LOCK_AGGR_TASK;
1565 
1566     err = perf_session__process_events(session);
1567     if (err)
1568         goto out_delete;
1569 
1570     setup_pager();
1571     if (display_info) /* used for info subcommand */
1572         err = dump_info();
1573     else {
1574         combine_result();
1575         sort_result();
1576         print_result();
1577     }
1578 
1579 out_delete:
1580     perf_session__delete(session);
1581     return err;
1582 }
1583 
1584 static void sighandler(int sig __maybe_unused)
1585 {
1586 }
1587 
1588 static int __cmd_contention(int argc, const char **argv)
1589 {
1590     int err = -EINVAL;
1591     struct perf_tool eops = {
1592         .sample      = process_sample_event,
1593         .comm        = perf_event__process_comm,
1594         .mmap        = perf_event__process_mmap,
1595         .ordered_events  = true,
1596     };
1597     struct perf_data data = {
1598         .path  = input_name,
1599         .mode  = PERF_DATA_MODE_READ,
1600         .force = force,
1601     };
1602     struct lock_contention con = {
1603         .target = &target,
1604         .result = &lockhash_table[0],
1605         .map_nr_entries = bpf_map_entries,
1606     };
1607 
1608     session = perf_session__new(use_bpf ? NULL : &data, &eops);
1609     if (IS_ERR(session)) {
1610         pr_err("Initializing perf session failed\n");
1611         return PTR_ERR(session);
1612     }
1613 
1614     /* for lock function check */
1615     symbol_conf.sort_by_name = true;
1616     symbol__init(&session->header.env);
1617 
1618     if (use_bpf) {
1619         err = target__validate(&target);
1620         if (err) {
1621             char errbuf[512];
1622 
1623             target__strerror(&target, err, errbuf, 512);
1624             pr_err("%s\n", errbuf);
1625             goto out_delete;
1626         }
1627 
1628         signal(SIGINT, sighandler);
1629         signal(SIGCHLD, sighandler);
1630         signal(SIGTERM, sighandler);
1631 
1632         con.machine = &session->machines.host;
1633 
1634         con.evlist = evlist__new();
1635         if (con.evlist == NULL) {
1636             err = -ENOMEM;
1637             goto out_delete;
1638         }
1639 
1640         err = evlist__create_maps(con.evlist, &target);
1641         if (err < 0)
1642             goto out_delete;
1643 
1644         if (argc) {
1645             err = evlist__prepare_workload(con.evlist, &target,
1646                                argv, false, NULL);
1647             if (err < 0)
1648                 goto out_delete;
1649         }
1650 
1651         if (lock_contention_prepare(&con) < 0) {
1652             pr_err("lock contention BPF setup failed\n");
1653             goto out_delete;
1654         }
1655     } else {
1656         if (!perf_session__has_traces(session, "lock record"))
1657             goto out_delete;
1658 
1659         if (!evlist__find_evsel_by_str(session->evlist,
1660                            "lock:contention_begin")) {
1661             pr_err("lock contention evsel not found\n");
1662             goto out_delete;
1663         }
1664 
1665         if (perf_session__set_tracepoints_handlers(session,
1666                         contention_tracepoints)) {
1667             pr_err("Initializing perf session tracepoint handlers failed\n");
1668             goto out_delete;
1669         }
1670     }
1671 
1672     if (setup_output_field(true, output_fields))
1673         goto out_delete;
1674 
1675     if (select_key(true))
1676         goto out_delete;
1677 
1678     if (show_thread_stats)
1679         aggr_mode = LOCK_AGGR_TASK;
1680     else
1681         aggr_mode = LOCK_AGGR_CALLER;
1682 
1683     if (use_bpf) {
1684         lock_contention_start();
1685         if (argc)
1686             evlist__start_workload(con.evlist);
1687 
1688         /* wait for signal */
1689         pause();
1690 
1691         lock_contention_stop();
1692         lock_contention_read(&con);
1693 
1694         /* abuse bad hist stats for lost entries */
1695         bad_hist[BROKEN_CONTENDED] = con.lost;
1696     } else {
1697         err = perf_session__process_events(session);
1698         if (err)
1699             goto out_delete;
1700     }
1701 
1702     setup_pager();
1703 
1704     sort_contention_result();
1705     print_contention_result();
1706 
1707 out_delete:
1708     evlist__delete(con.evlist);
1709     lock_contention_finish();
1710     perf_session__delete(session);
1711     return err;
1712 }
1713 
1714 
1715 static int __cmd_record(int argc, const char **argv)
1716 {
1717     const char *record_args[] = {
1718         "record", "-R", "-m", "1024", "-c", "1", "--synth", "task",
1719     };
1720     const char *callgraph_args[] = {
1721         "--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH),
1722     };
1723     unsigned int rec_argc, i, j, ret;
1724     unsigned int nr_tracepoints;
1725     unsigned int nr_callgraph_args = 0;
1726     const char **rec_argv;
1727     bool has_lock_stat = true;
1728 
1729     for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
1730         if (!is_valid_tracepoint(lock_tracepoints[i].name)) {
1731             pr_debug("tracepoint %s is not enabled. "
1732                  "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
1733                  lock_tracepoints[i].name);
1734             has_lock_stat = false;
1735             break;
1736         }
1737     }
1738 
1739     if (has_lock_stat)
1740         goto setup_args;
1741 
1742     for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) {
1743         if (!is_valid_tracepoint(contention_tracepoints[i].name)) {
1744             pr_err("tracepoint %s is not enabled.\n",
1745                    contention_tracepoints[i].name);
1746             return 1;
1747         }
1748     }
1749 
1750     nr_callgraph_args = ARRAY_SIZE(callgraph_args);
1751 
1752 setup_args:
1753     rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1;
1754 
1755     if (has_lock_stat)
1756         nr_tracepoints = ARRAY_SIZE(lock_tracepoints);
1757     else
1758         nr_tracepoints = ARRAY_SIZE(contention_tracepoints);
1759 
1760     /* factor of 2 is for -e in front of each tracepoint */
1761     rec_argc += 2 * nr_tracepoints;
1762 
1763     rec_argv = calloc(rec_argc + 1, sizeof(char *));
1764     if (!rec_argv)
1765         return -ENOMEM;
1766 
1767     for (i = 0; i < ARRAY_SIZE(record_args); i++)
1768         rec_argv[i] = strdup(record_args[i]);
1769 
1770     for (j = 0; j < nr_tracepoints; j++) {
1771         const char *ev_name;
1772 
1773         if (has_lock_stat)
1774             ev_name = strdup(lock_tracepoints[j].name);
1775         else
1776             ev_name = strdup(contention_tracepoints[j].name);
1777 
1778         if (!ev_name)
1779             return -ENOMEM;
1780 
1781         rec_argv[i++] = "-e";
1782         rec_argv[i++] = ev_name;
1783     }
1784 
1785     for (j = 0; j < nr_callgraph_args; j++, i++)
1786         rec_argv[i] = callgraph_args[j];
1787 
1788     for (j = 1; j < (unsigned int)argc; j++, i++)
1789         rec_argv[i] = argv[j];
1790 
1791     BUG_ON(i != rec_argc);
1792 
1793     ret = cmd_record(i, rec_argv);
1794     free(rec_argv);
1795     return ret;
1796 }
1797 
1798 static int parse_map_entry(const struct option *opt, const char *str,
1799                 int unset __maybe_unused)
1800 {
1801     unsigned long *len = (unsigned long *)opt->value;
1802     unsigned long val;
1803     char *endptr;
1804 
1805     errno = 0;
1806     val = strtoul(str, &endptr, 0);
1807     if (*endptr != '\0' || errno != 0) {
1808         pr_err("invalid BPF map length: %s\n", str);
1809         return -1;
1810     }
1811 
1812     *len = val;
1813     return 0;
1814 }
1815 
1816 int cmd_lock(int argc, const char **argv)
1817 {
1818     const struct option lock_options[] = {
1819     OPT_STRING('i', "input", &input_name, "file", "input file name"),
1820     OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
1821     OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
1822     OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
1823     OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1824            "file", "vmlinux pathname"),
1825     OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
1826            "file", "kallsyms pathname"),
1827     OPT_END()
1828     };
1829 
1830     const struct option info_options[] = {
1831     OPT_BOOLEAN('t', "threads", &info_threads,
1832             "dump thread list in perf.data"),
1833     OPT_BOOLEAN('m', "map", &info_map,
1834             "map of lock instances (address:name table)"),
1835     OPT_PARENT(lock_options)
1836     };
1837 
1838     const struct option report_options[] = {
1839     OPT_STRING('k', "key", &sort_key, "acquired",
1840             "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
1841     OPT_STRING('F', "field", &output_fields, NULL,
1842             "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
1843     /* TODO: type */
1844     OPT_BOOLEAN('c', "combine-locks", &combine_locks,
1845             "combine locks in the same class"),
1846     OPT_BOOLEAN('t', "threads", &show_thread_stats,
1847             "show per-thread lock stats"),
1848     OPT_PARENT(lock_options)
1849     };
1850 
1851     struct option contention_options[] = {
1852     OPT_STRING('k', "key", &sort_key, "wait_total",
1853             "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"),
1854     OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait",
1855             "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"),
1856     OPT_BOOLEAN('t', "threads", &show_thread_stats,
1857             "show per-thread lock stats"),
1858     OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"),
1859     OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
1860             "System-wide collection from all CPUs"),
1861     OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1862             "List of cpus to monitor"),
1863     OPT_STRING('p', "pid", &target.pid, "pid",
1864            "Trace on existing process id"),
1865     OPT_STRING(0, "tid", &target.tid, "tid",
1866            "Trace on existing thread id (exclusive to --pid)"),
1867     OPT_CALLBACK(0, "map-nr-entries", &bpf_map_entries, "num",
1868              "Max number of BPF map entries", parse_map_entry),
1869     OPT_PARENT(lock_options)
1870     };
1871 
1872     const char * const info_usage[] = {
1873         "perf lock info [<options>]",
1874         NULL
1875     };
1876     const char *const lock_subcommands[] = { "record", "report", "script",
1877                          "info", "contention", NULL };
1878     const char *lock_usage[] = {
1879         NULL,
1880         NULL
1881     };
1882     const char * const report_usage[] = {
1883         "perf lock report [<options>]",
1884         NULL
1885     };
1886     const char * const contention_usage[] = {
1887         "perf lock contention [<options>]",
1888         NULL
1889     };
1890     unsigned int i;
1891     int rc = 0;
1892 
1893     for (i = 0; i < LOCKHASH_SIZE; i++)
1894         INIT_HLIST_HEAD(lockhash_table + i);
1895 
1896     argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
1897                     lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
1898     if (!argc)
1899         usage_with_options(lock_usage, lock_options);
1900 
1901     if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
1902         return __cmd_record(argc, argv);
1903     } else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) {
1904         trace_handler = &report_lock_ops;
1905         if (argc) {
1906             argc = parse_options(argc, argv,
1907                          report_options, report_usage, 0);
1908             if (argc)
1909                 usage_with_options(report_usage, report_options);
1910         }
1911         rc = __cmd_report(false);
1912     } else if (!strcmp(argv[0], "script")) {
1913         /* Aliased to 'perf script' */
1914         return cmd_script(argc, argv);
1915     } else if (!strcmp(argv[0], "info")) {
1916         if (argc) {
1917             argc = parse_options(argc, argv,
1918                          info_options, info_usage, 0);
1919             if (argc)
1920                 usage_with_options(info_usage, info_options);
1921         }
1922         /* recycling report_lock_ops */
1923         trace_handler = &report_lock_ops;
1924         rc = __cmd_report(true);
1925     } else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) {
1926         trace_handler = &contention_lock_ops;
1927         sort_key = "wait_total";
1928         output_fields = "contended,wait_total,wait_max,avg_wait";
1929 
1930 #ifndef HAVE_BPF_SKEL
1931         set_option_nobuild(contention_options, 'b', "use-bpf",
1932                    "no BUILD_BPF_SKEL=1", false);
1933 #endif
1934         if (argc) {
1935             argc = parse_options(argc, argv, contention_options,
1936                          contention_usage, 0);
1937         }
1938         rc = __cmd_contention(argc, argv);
1939     } else {
1940         usage_with_options(lock_usage, lock_options);
1941     }
1942 
1943     return rc;
1944 }