0001
0002
0003
0004
0005
0006
0007 #include <linux/rbtree.h>
0008 #include <linux/list.h>
0009 #include <linux/log2.h>
0010 #include <linux/zalloc.h>
0011 #include <errno.h>
0012 #include <stdlib.h>
0013 #include <string.h>
0014 #include "thread.h"
0015 #include "event.h"
0016 #include "machine.h"
0017 #include "env.h"
0018 #include "debug.h"
0019 #include "symbol.h"
0020 #include "comm.h"
0021 #include "call-path.h"
0022 #include "thread-stack.h"
0023
0024 #define STACK_GROWTH 2048
0025
0026
0027
0028
0029
0030
0031
0032
0033 enum retpoline_state_t {
0034 RETPOLINE_NONE,
0035 X86_RETPOLINE_POSSIBLE,
0036 X86_RETPOLINE_DETECTED,
0037 };
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053 struct thread_stack_entry {
0054 u64 ret_addr;
0055 u64 timestamp;
0056 u64 ref;
0057 u64 branch_count;
0058 u64 insn_count;
0059 u64 cyc_count;
0060 u64 db_id;
0061 struct call_path *cp;
0062 bool no_call;
0063 bool trace_end;
0064 bool non_call;
0065 };
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088 struct thread_stack {
0089 struct thread_stack_entry *stack;
0090 size_t cnt;
0091 size_t sz;
0092 u64 trace_nr;
0093 u64 branch_count;
0094 u64 insn_count;
0095 u64 cyc_count;
0096 u64 kernel_start;
0097 u64 last_time;
0098 struct call_return_processor *crp;
0099 struct comm *comm;
0100 unsigned int arr_sz;
0101 enum retpoline_state_t rstate;
0102 struct branch_stack *br_stack_rb;
0103 unsigned int br_stack_sz;
0104 unsigned int br_stack_pos;
0105 bool mispred_all;
0106 };
0107
0108
0109
0110
0111
0112
0113 static inline bool thread_stack__per_cpu(struct thread *thread)
0114 {
0115 return !(thread->tid || thread->pid_);
0116 }
0117
0118 static int thread_stack__grow(struct thread_stack *ts)
0119 {
0120 struct thread_stack_entry *new_stack;
0121 size_t sz, new_sz;
0122
0123 new_sz = ts->sz + STACK_GROWTH;
0124 sz = new_sz * sizeof(struct thread_stack_entry);
0125
0126 new_stack = realloc(ts->stack, sz);
0127 if (!new_stack)
0128 return -ENOMEM;
0129
0130 ts->stack = new_stack;
0131 ts->sz = new_sz;
0132
0133 return 0;
0134 }
0135
0136 static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
0137 struct call_return_processor *crp,
0138 bool callstack, unsigned int br_stack_sz)
0139 {
0140 int err;
0141
0142 if (callstack) {
0143 err = thread_stack__grow(ts);
0144 if (err)
0145 return err;
0146 }
0147
0148 if (br_stack_sz) {
0149 size_t sz = sizeof(struct branch_stack);
0150
0151 sz += br_stack_sz * sizeof(struct branch_entry);
0152 ts->br_stack_rb = zalloc(sz);
0153 if (!ts->br_stack_rb)
0154 return -ENOMEM;
0155 ts->br_stack_sz = br_stack_sz;
0156 }
0157
0158 if (thread->maps && thread->maps->machine) {
0159 struct machine *machine = thread->maps->machine;
0160 const char *arch = perf_env__arch(machine->env);
0161
0162 ts->kernel_start = machine__kernel_start(machine);
0163 if (!strcmp(arch, "x86"))
0164 ts->rstate = X86_RETPOLINE_POSSIBLE;
0165 } else {
0166 ts->kernel_start = 1ULL << 63;
0167 }
0168 ts->crp = crp;
0169
0170 return 0;
0171 }
0172
0173 static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
0174 struct call_return_processor *crp,
0175 bool callstack,
0176 unsigned int br_stack_sz)
0177 {
0178 struct thread_stack *ts = thread->ts, *new_ts;
0179 unsigned int old_sz = ts ? ts->arr_sz : 0;
0180 unsigned int new_sz = 1;
0181
0182 if (thread_stack__per_cpu(thread) && cpu > 0)
0183 new_sz = roundup_pow_of_two(cpu + 1);
0184
0185 if (!ts || new_sz > old_sz) {
0186 new_ts = calloc(new_sz, sizeof(*ts));
0187 if (!new_ts)
0188 return NULL;
0189 if (ts)
0190 memcpy(new_ts, ts, old_sz * sizeof(*ts));
0191 new_ts->arr_sz = new_sz;
0192 zfree(&thread->ts);
0193 thread->ts = new_ts;
0194 ts = new_ts;
0195 }
0196
0197 if (thread_stack__per_cpu(thread) && cpu > 0 &&
0198 (unsigned int)cpu < ts->arr_sz)
0199 ts += cpu;
0200
0201 if (!ts->stack &&
0202 thread_stack__init(ts, thread, crp, callstack, br_stack_sz))
0203 return NULL;
0204
0205 return ts;
0206 }
0207
0208 static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
0209 {
0210 struct thread_stack *ts = thread->ts;
0211
0212 if (cpu < 0)
0213 cpu = 0;
0214
0215 if (!ts || (unsigned int)cpu >= ts->arr_sz)
0216 return NULL;
0217
0218 ts += cpu;
0219
0220 if (!ts->stack)
0221 return NULL;
0222
0223 return ts;
0224 }
0225
0226 static inline struct thread_stack *thread__stack(struct thread *thread,
0227 int cpu)
0228 {
0229 if (!thread)
0230 return NULL;
0231
0232 if (thread_stack__per_cpu(thread))
0233 return thread__cpu_stack(thread, cpu);
0234
0235 return thread->ts;
0236 }
0237
0238 static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
0239 bool trace_end)
0240 {
0241 int err = 0;
0242
0243 if (ts->cnt == ts->sz) {
0244 err = thread_stack__grow(ts);
0245 if (err) {
0246 pr_warning("Out of memory: discarding thread stack\n");
0247 ts->cnt = 0;
0248 }
0249 }
0250
0251 ts->stack[ts->cnt].trace_end = trace_end;
0252 ts->stack[ts->cnt++].ret_addr = ret_addr;
0253
0254 return err;
0255 }
0256
0257 static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
0258 {
0259 size_t i;
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269
0270 for (i = ts->cnt; i; ) {
0271 if (ts->stack[--i].ret_addr == ret_addr) {
0272 ts->cnt = i;
0273 return;
0274 }
0275 }
0276 }
0277
0278 static void thread_stack__pop_trace_end(struct thread_stack *ts)
0279 {
0280 size_t i;
0281
0282 for (i = ts->cnt; i; ) {
0283 if (ts->stack[--i].trace_end)
0284 ts->cnt = i;
0285 else
0286 return;
0287 }
0288 }
0289
0290 static bool thread_stack__in_kernel(struct thread_stack *ts)
0291 {
0292 if (!ts->cnt)
0293 return false;
0294
0295 return ts->stack[ts->cnt - 1].cp->in_kernel;
0296 }
0297
0298 static int thread_stack__call_return(struct thread *thread,
0299 struct thread_stack *ts, size_t idx,
0300 u64 timestamp, u64 ref, bool no_return)
0301 {
0302 struct call_return_processor *crp = ts->crp;
0303 struct thread_stack_entry *tse;
0304 struct call_return cr = {
0305 .thread = thread,
0306 .comm = ts->comm,
0307 .db_id = 0,
0308 };
0309 u64 *parent_db_id;
0310
0311 tse = &ts->stack[idx];
0312 cr.cp = tse->cp;
0313 cr.call_time = tse->timestamp;
0314 cr.return_time = timestamp;
0315 cr.branch_count = ts->branch_count - tse->branch_count;
0316 cr.insn_count = ts->insn_count - tse->insn_count;
0317 cr.cyc_count = ts->cyc_count - tse->cyc_count;
0318 cr.db_id = tse->db_id;
0319 cr.call_ref = tse->ref;
0320 cr.return_ref = ref;
0321 if (tse->no_call)
0322 cr.flags |= CALL_RETURN_NO_CALL;
0323 if (no_return)
0324 cr.flags |= CALL_RETURN_NO_RETURN;
0325 if (tse->non_call)
0326 cr.flags |= CALL_RETURN_NON_CALL;
0327
0328
0329
0330
0331
0332
0333 parent_db_id = idx ? &(tse - 1)->db_id : NULL;
0334
0335 return crp->process(&cr, parent_db_id, crp->data);
0336 }
0337
0338 static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
0339 {
0340 struct call_return_processor *crp = ts->crp;
0341 int err;
0342
0343 if (!crp) {
0344 ts->cnt = 0;
0345 ts->br_stack_pos = 0;
0346 if (ts->br_stack_rb)
0347 ts->br_stack_rb->nr = 0;
0348 return 0;
0349 }
0350
0351 while (ts->cnt) {
0352 err = thread_stack__call_return(thread, ts, --ts->cnt,
0353 ts->last_time, 0, true);
0354 if (err) {
0355 pr_err("Error flushing thread stack!\n");
0356 ts->cnt = 0;
0357 return err;
0358 }
0359 }
0360
0361 return 0;
0362 }
0363
0364 int thread_stack__flush(struct thread *thread)
0365 {
0366 struct thread_stack *ts = thread->ts;
0367 unsigned int pos;
0368 int err = 0;
0369
0370 if (ts) {
0371 for (pos = 0; pos < ts->arr_sz; pos++) {
0372 int ret = __thread_stack__flush(thread, ts + pos);
0373
0374 if (ret)
0375 err = ret;
0376 }
0377 }
0378
0379 return err;
0380 }
0381
0382 static void thread_stack__update_br_stack(struct thread_stack *ts, u32 flags,
0383 u64 from_ip, u64 to_ip)
0384 {
0385 struct branch_stack *bs = ts->br_stack_rb;
0386 struct branch_entry *be;
0387
0388 if (!ts->br_stack_pos)
0389 ts->br_stack_pos = ts->br_stack_sz;
0390
0391 ts->br_stack_pos -= 1;
0392
0393 be = &bs->entries[ts->br_stack_pos];
0394 be->from = from_ip;
0395 be->to = to_ip;
0396 be->flags.value = 0;
0397 be->flags.abort = !!(flags & PERF_IP_FLAG_TX_ABORT);
0398 be->flags.in_tx = !!(flags & PERF_IP_FLAG_IN_TX);
0399
0400 be->flags.mispred = ts->mispred_all;
0401
0402 if (bs->nr < ts->br_stack_sz)
0403 bs->nr += 1;
0404 }
0405
0406 int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
0407 u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack,
0408 unsigned int br_stack_sz, bool mispred_all)
0409 {
0410 struct thread_stack *ts = thread__stack(thread, cpu);
0411
0412 if (!thread)
0413 return -EINVAL;
0414
0415 if (!ts) {
0416 ts = thread_stack__new(thread, cpu, NULL, callstack, br_stack_sz);
0417 if (!ts) {
0418 pr_warning("Out of memory: no thread stack\n");
0419 return -ENOMEM;
0420 }
0421 ts->trace_nr = trace_nr;
0422 ts->mispred_all = mispred_all;
0423 }
0424
0425
0426
0427
0428
0429
0430 if (trace_nr != ts->trace_nr) {
0431 if (ts->trace_nr)
0432 __thread_stack__flush(thread, ts);
0433 ts->trace_nr = trace_nr;
0434 }
0435
0436 if (br_stack_sz)
0437 thread_stack__update_br_stack(ts, flags, from_ip, to_ip);
0438
0439
0440
0441
0442
0443 if (ts->crp || !callstack)
0444 return 0;
0445
0446 if (flags & PERF_IP_FLAG_CALL) {
0447 u64 ret_addr;
0448
0449 if (!to_ip)
0450 return 0;
0451 ret_addr = from_ip + insn_len;
0452 if (ret_addr == to_ip)
0453 return 0;
0454 return thread_stack__push(ts, ret_addr,
0455 flags & PERF_IP_FLAG_TRACE_END);
0456 } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
0457
0458
0459
0460
0461
0462
0463
0464 thread_stack__pop(ts, to_ip);
0465 thread_stack__pop_trace_end(ts);
0466 } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
0467 thread_stack__pop(ts, to_ip);
0468 }
0469
0470 return 0;
0471 }
0472
0473 void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
0474 {
0475 struct thread_stack *ts = thread__stack(thread, cpu);
0476
0477 if (!ts)
0478 return;
0479
0480 if (trace_nr != ts->trace_nr) {
0481 if (ts->trace_nr)
0482 __thread_stack__flush(thread, ts);
0483 ts->trace_nr = trace_nr;
0484 }
0485 }
0486
0487 static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
0488 {
0489 __thread_stack__flush(thread, ts);
0490 zfree(&ts->stack);
0491 zfree(&ts->br_stack_rb);
0492 }
0493
0494 static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
0495 {
0496 unsigned int arr_sz = ts->arr_sz;
0497
0498 __thread_stack__free(thread, ts);
0499 memset(ts, 0, sizeof(*ts));
0500 ts->arr_sz = arr_sz;
0501 }
0502
0503 void thread_stack__free(struct thread *thread)
0504 {
0505 struct thread_stack *ts = thread->ts;
0506 unsigned int pos;
0507
0508 if (ts) {
0509 for (pos = 0; pos < ts->arr_sz; pos++)
0510 __thread_stack__free(thread, ts + pos);
0511 zfree(&thread->ts);
0512 }
0513 }
0514
0515 static inline u64 callchain_context(u64 ip, u64 kernel_start)
0516 {
0517 return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
0518 }
0519
0520 void thread_stack__sample(struct thread *thread, int cpu,
0521 struct ip_callchain *chain,
0522 size_t sz, u64 ip, u64 kernel_start)
0523 {
0524 struct thread_stack *ts = thread__stack(thread, cpu);
0525 u64 context = callchain_context(ip, kernel_start);
0526 u64 last_context;
0527 size_t i, j;
0528
0529 if (sz < 2) {
0530 chain->nr = 0;
0531 return;
0532 }
0533
0534 chain->ips[0] = context;
0535 chain->ips[1] = ip;
0536
0537 if (!ts) {
0538 chain->nr = 2;
0539 return;
0540 }
0541
0542 last_context = context;
0543
0544 for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
0545 ip = ts->stack[ts->cnt - j].ret_addr;
0546 context = callchain_context(ip, kernel_start);
0547 if (context != last_context) {
0548 if (i >= sz - 1)
0549 break;
0550 chain->ips[i++] = context;
0551 last_context = context;
0552 }
0553 chain->ips[i] = ip;
0554 }
0555
0556 chain->nr = i;
0557 }
0558
0559
0560
0561
0562
0563 void thread_stack__sample_late(struct thread *thread, int cpu,
0564 struct ip_callchain *chain, size_t sz,
0565 u64 sample_ip, u64 kernel_start)
0566 {
0567 struct thread_stack *ts = thread__stack(thread, cpu);
0568 u64 sample_context = callchain_context(sample_ip, kernel_start);
0569 u64 last_context, context, ip;
0570 size_t nr = 0, j;
0571
0572 if (sz < 2) {
0573 chain->nr = 0;
0574 return;
0575 }
0576
0577 if (!ts)
0578 goto out;
0579
0580
0581
0582
0583
0584
0585 for (j = 1; j <= ts->cnt; j++) {
0586 ip = ts->stack[ts->cnt - j].ret_addr;
0587 context = callchain_context(ip, kernel_start);
0588 if (context == PERF_CONTEXT_USER ||
0589 (context == sample_context && ip == sample_ip))
0590 break;
0591 }
0592
0593 last_context = sample_ip;
0594
0595 for (; nr < sz && j <= ts->cnt; nr++, j++) {
0596 ip = ts->stack[ts->cnt - j].ret_addr;
0597 context = callchain_context(ip, kernel_start);
0598 if (context != last_context) {
0599 if (nr >= sz - 1)
0600 break;
0601 chain->ips[nr++] = context;
0602 last_context = context;
0603 }
0604 chain->ips[nr] = ip;
0605 }
0606 out:
0607 if (nr) {
0608 chain->nr = nr;
0609 } else {
0610 chain->ips[0] = sample_context;
0611 chain->ips[1] = sample_ip;
0612 chain->nr = 2;
0613 }
0614 }
0615
0616 void thread_stack__br_sample(struct thread *thread, int cpu,
0617 struct branch_stack *dst, unsigned int sz)
0618 {
0619 struct thread_stack *ts = thread__stack(thread, cpu);
0620 const size_t bsz = sizeof(struct branch_entry);
0621 struct branch_stack *src;
0622 struct branch_entry *be;
0623 unsigned int nr;
0624
0625 dst->nr = 0;
0626
0627 if (!ts)
0628 return;
0629
0630 src = ts->br_stack_rb;
0631 if (!src->nr)
0632 return;
0633
0634 dst->nr = min((unsigned int)src->nr, sz);
0635
0636 be = &dst->entries[0];
0637 nr = min(ts->br_stack_sz - ts->br_stack_pos, (unsigned int)dst->nr);
0638 memcpy(be, &src->entries[ts->br_stack_pos], bsz * nr);
0639
0640 if (src->nr >= ts->br_stack_sz) {
0641 sz -= nr;
0642 be = &dst->entries[nr];
0643 nr = min(ts->br_stack_pos, sz);
0644 memcpy(be, &src->entries[0], bsz * ts->br_stack_pos);
0645 }
0646 }
0647
0648
0649 static bool us_start(struct branch_entry *be, u64 kernel_start, bool *start)
0650 {
0651 if (!*start)
0652 *start = be->to && be->to < kernel_start;
0653
0654 return *start;
0655 }
0656
0657
0658
0659
0660
0661 static bool ks_start(struct branch_entry *be, u64 sample_ip, u64 kernel_start,
0662 bool *start, struct branch_entry *nb)
0663 {
0664 if (!*start) {
0665 *start = (nb && sample_ip >= be->to && sample_ip <= nb->from) ||
0666 be->from < kernel_start ||
0667 (be->to && be->to < kernel_start);
0668 }
0669
0670 return *start;
0671 }
0672
0673
0674
0675
0676
0677 void thread_stack__br_sample_late(struct thread *thread, int cpu,
0678 struct branch_stack *dst, unsigned int sz,
0679 u64 ip, u64 kernel_start)
0680 {
0681 struct thread_stack *ts = thread__stack(thread, cpu);
0682 struct branch_entry *d, *s, *spos, *ssz;
0683 struct branch_stack *src;
0684 unsigned int nr = 0;
0685 bool start = false;
0686
0687 dst->nr = 0;
0688
0689 if (!ts)
0690 return;
0691
0692 src = ts->br_stack_rb;
0693 if (!src->nr)
0694 return;
0695
0696 spos = &src->entries[ts->br_stack_pos];
0697 ssz = &src->entries[ts->br_stack_sz];
0698
0699 d = &dst->entries[0];
0700 s = spos;
0701
0702 if (ip < kernel_start) {
0703
0704
0705
0706
0707 for (s = spos; s < ssz && nr < sz; s++) {
0708 if (us_start(s, kernel_start, &start)) {
0709 *d++ = *s;
0710 nr += 1;
0711 }
0712 }
0713
0714 if (src->nr >= ts->br_stack_sz) {
0715 for (s = &src->entries[0]; s < spos && nr < sz; s++) {
0716 if (us_start(s, kernel_start, &start)) {
0717 *d++ = *s;
0718 nr += 1;
0719 }
0720 }
0721 }
0722 } else {
0723 struct branch_entry *nb = NULL;
0724
0725
0726
0727
0728
0729
0730 for (s = spos; s < ssz && nr < sz; s++) {
0731 if (ks_start(s, ip, kernel_start, &start, nb)) {
0732 *d++ = *s;
0733 nr += 1;
0734 }
0735 nb = s;
0736 }
0737
0738 if (src->nr >= ts->br_stack_sz) {
0739 for (s = &src->entries[0]; s < spos && nr < sz; s++) {
0740 if (ks_start(s, ip, kernel_start, &start, nb)) {
0741 *d++ = *s;
0742 nr += 1;
0743 }
0744 nb = s;
0745 }
0746 }
0747 }
0748
0749 dst->nr = nr;
0750 }
0751
0752 struct call_return_processor *
0753 call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data),
0754 void *data)
0755 {
0756 struct call_return_processor *crp;
0757
0758 crp = zalloc(sizeof(struct call_return_processor));
0759 if (!crp)
0760 return NULL;
0761 crp->cpr = call_path_root__new();
0762 if (!crp->cpr)
0763 goto out_free;
0764 crp->process = process;
0765 crp->data = data;
0766 return crp;
0767
0768 out_free:
0769 free(crp);
0770 return NULL;
0771 }
0772
0773 void call_return_processor__free(struct call_return_processor *crp)
0774 {
0775 if (crp) {
0776 call_path_root__free(crp->cpr);
0777 free(crp);
0778 }
0779 }
0780
0781 static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
0782 u64 timestamp, u64 ref, struct call_path *cp,
0783 bool no_call, bool trace_end)
0784 {
0785 struct thread_stack_entry *tse;
0786 int err;
0787
0788 if (!cp)
0789 return -ENOMEM;
0790
0791 if (ts->cnt == ts->sz) {
0792 err = thread_stack__grow(ts);
0793 if (err)
0794 return err;
0795 }
0796
0797 tse = &ts->stack[ts->cnt++];
0798 tse->ret_addr = ret_addr;
0799 tse->timestamp = timestamp;
0800 tse->ref = ref;
0801 tse->branch_count = ts->branch_count;
0802 tse->insn_count = ts->insn_count;
0803 tse->cyc_count = ts->cyc_count;
0804 tse->cp = cp;
0805 tse->no_call = no_call;
0806 tse->trace_end = trace_end;
0807 tse->non_call = false;
0808 tse->db_id = 0;
0809
0810 return 0;
0811 }
0812
0813 static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
0814 u64 ret_addr, u64 timestamp, u64 ref,
0815 struct symbol *sym)
0816 {
0817 int err;
0818
0819 if (!ts->cnt)
0820 return 1;
0821
0822 if (ts->cnt == 1) {
0823 struct thread_stack_entry *tse = &ts->stack[0];
0824
0825 if (tse->cp->sym == sym)
0826 return thread_stack__call_return(thread, ts, --ts->cnt,
0827 timestamp, ref, false);
0828 }
0829
0830 if (ts->stack[ts->cnt - 1].ret_addr == ret_addr &&
0831 !ts->stack[ts->cnt - 1].non_call) {
0832 return thread_stack__call_return(thread, ts, --ts->cnt,
0833 timestamp, ref, false);
0834 } else {
0835 size_t i = ts->cnt - 1;
0836
0837 while (i--) {
0838 if (ts->stack[i].ret_addr != ret_addr ||
0839 ts->stack[i].non_call)
0840 continue;
0841 i += 1;
0842 while (ts->cnt > i) {
0843 err = thread_stack__call_return(thread, ts,
0844 --ts->cnt,
0845 timestamp, ref,
0846 true);
0847 if (err)
0848 return err;
0849 }
0850 return thread_stack__call_return(thread, ts, --ts->cnt,
0851 timestamp, ref, false);
0852 }
0853 }
0854
0855 return 1;
0856 }
0857
0858 static int thread_stack__bottom(struct thread_stack *ts,
0859 struct perf_sample *sample,
0860 struct addr_location *from_al,
0861 struct addr_location *to_al, u64 ref)
0862 {
0863 struct call_path_root *cpr = ts->crp->cpr;
0864 struct call_path *cp;
0865 struct symbol *sym;
0866 u64 ip;
0867
0868 if (sample->ip) {
0869 ip = sample->ip;
0870 sym = from_al->sym;
0871 } else if (sample->addr) {
0872 ip = sample->addr;
0873 sym = to_al->sym;
0874 } else {
0875 return 0;
0876 }
0877
0878 cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
0879 ts->kernel_start);
0880
0881 return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
0882 true, false);
0883 }
0884
0885 static int thread_stack__pop_ks(struct thread *thread, struct thread_stack *ts,
0886 struct perf_sample *sample, u64 ref)
0887 {
0888 u64 tm = sample->time;
0889 int err;
0890
0891
0892 while (thread_stack__in_kernel(ts)) {
0893 err = thread_stack__call_return(thread, ts, --ts->cnt,
0894 tm, ref, true);
0895 if (err)
0896 return err;
0897 }
0898
0899 return 0;
0900 }
0901
0902 static int thread_stack__no_call_return(struct thread *thread,
0903 struct thread_stack *ts,
0904 struct perf_sample *sample,
0905 struct addr_location *from_al,
0906 struct addr_location *to_al, u64 ref)
0907 {
0908 struct call_path_root *cpr = ts->crp->cpr;
0909 struct call_path *root = &cpr->call_path;
0910 struct symbol *fsym = from_al->sym;
0911 struct symbol *tsym = to_al->sym;
0912 struct call_path *cp, *parent;
0913 u64 ks = ts->kernel_start;
0914 u64 addr = sample->addr;
0915 u64 tm = sample->time;
0916 u64 ip = sample->ip;
0917 int err;
0918
0919 if (ip >= ks && addr < ks) {
0920
0921 err = thread_stack__pop_ks(thread, ts, sample, ref);
0922 if (err)
0923 return err;
0924
0925
0926 if (!ts->cnt) {
0927 cp = call_path__findnew(cpr, root, tsym, addr, ks);
0928 return thread_stack__push_cp(ts, 0, tm, ref, cp, true,
0929 false);
0930 }
0931 } else if (thread_stack__in_kernel(ts) && ip < ks) {
0932
0933 err = thread_stack__pop_ks(thread, ts, sample, ref);
0934 if (err)
0935 return err;
0936 }
0937
0938 if (ts->cnt)
0939 parent = ts->stack[ts->cnt - 1].cp;
0940 else
0941 parent = root;
0942
0943 if (parent->sym == from_al->sym) {
0944
0945
0946
0947
0948
0949 if (ts->cnt == 1) {
0950 err = thread_stack__call_return(thread, ts, --ts->cnt,
0951 tm, ref, false);
0952 if (err)
0953 return err;
0954 }
0955
0956 if (!ts->cnt) {
0957 cp = call_path__findnew(cpr, root, tsym, addr, ks);
0958
0959 return thread_stack__push_cp(ts, addr, tm, ref, cp,
0960 true, false);
0961 }
0962
0963
0964
0965
0966
0967 cp = call_path__findnew(cpr, parent, tsym, addr, ks);
0968
0969 err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false);
0970 if (!err)
0971 ts->stack[ts->cnt - 1].non_call = true;
0972
0973 return err;
0974 }
0975
0976
0977
0978
0979
0980
0981 cp = call_path__findnew(cpr, parent, tsym, addr, ks);
0982
0983 err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false);
0984 if (err)
0985 return err;
0986
0987 cp = call_path__findnew(cpr, cp, fsym, ip, ks);
0988
0989 err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false);
0990 if (err)
0991 return err;
0992
0993 return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false);
0994 }
0995
0996 static int thread_stack__trace_begin(struct thread *thread,
0997 struct thread_stack *ts, u64 timestamp,
0998 u64 ref)
0999 {
1000 struct thread_stack_entry *tse;
1001 int err;
1002
1003 if (!ts->cnt)
1004 return 0;
1005
1006
1007 tse = &ts->stack[ts->cnt - 1];
1008 if (tse->trace_end) {
1009 err = thread_stack__call_return(thread, ts, --ts->cnt,
1010 timestamp, ref, false);
1011 if (err)
1012 return err;
1013 }
1014
1015 return 0;
1016 }
1017
1018 static int thread_stack__trace_end(struct thread_stack *ts,
1019 struct perf_sample *sample, u64 ref)
1020 {
1021 struct call_path_root *cpr = ts->crp->cpr;
1022 struct call_path *cp;
1023 u64 ret_addr;
1024
1025
1026 if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
1027 return 0;
1028
1029 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
1030 ts->kernel_start);
1031
1032 ret_addr = sample->ip + sample->insn_len;
1033
1034 return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
1035 false, true);
1036 }
1037
1038 static bool is_x86_retpoline(const char *name)
1039 {
1040 const char *p = strstr(name, "__x86_indirect_thunk_");
1041
1042 return p == name || !strcmp(name, "__indirect_thunk_start");
1043 }
1044
1045
1046
1047
1048
1049
1050 static int thread_stack__x86_retpoline(struct thread_stack *ts,
1051 struct perf_sample *sample,
1052 struct addr_location *to_al)
1053 {
1054 struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1];
1055 struct call_path_root *cpr = ts->crp->cpr;
1056 struct symbol *sym = tse->cp->sym;
1057 struct symbol *tsym = to_al->sym;
1058 struct call_path *cp;
1059
1060 if (sym && is_x86_retpoline(sym->name)) {
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071 ts->cnt -= 1;
1072 sym = ts->stack[ts->cnt - 2].cp->sym;
1073 if (sym && sym == tsym && to_al->addr != tsym->start) {
1074
1075
1076
1077
1078
1079 ts->cnt -= 1;
1080 return 0;
1081 }
1082 } else if (sym && sym == tsym) {
1083
1084
1085
1086
1087 ts->cnt -= 1;
1088 return 0;
1089 }
1090
1091 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym,
1092 sample->addr, ts->kernel_start);
1093 if (!cp)
1094 return -ENOMEM;
1095
1096
1097 ts->stack[ts->cnt - 1].cp = cp;
1098
1099 return 0;
1100 }
1101
1102 int thread_stack__process(struct thread *thread, struct comm *comm,
1103 struct perf_sample *sample,
1104 struct addr_location *from_al,
1105 struct addr_location *to_al, u64 ref,
1106 struct call_return_processor *crp)
1107 {
1108 struct thread_stack *ts = thread__stack(thread, sample->cpu);
1109 enum retpoline_state_t rstate;
1110 int err = 0;
1111
1112 if (ts && !ts->crp) {
1113
1114 thread_stack__reset(thread, ts);
1115 ts = NULL;
1116 }
1117
1118 if (!ts) {
1119 ts = thread_stack__new(thread, sample->cpu, crp, true, 0);
1120 if (!ts)
1121 return -ENOMEM;
1122 ts->comm = comm;
1123 }
1124
1125 rstate = ts->rstate;
1126 if (rstate == X86_RETPOLINE_DETECTED)
1127 ts->rstate = X86_RETPOLINE_POSSIBLE;
1128
1129
1130 if (ts->comm != comm && thread->pid_ == thread->tid) {
1131 err = __thread_stack__flush(thread, ts);
1132 if (err)
1133 return err;
1134 ts->comm = comm;
1135 }
1136
1137
1138 if (!ts->cnt) {
1139 err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
1140 if (err)
1141 return err;
1142 }
1143
1144 ts->branch_count += 1;
1145 ts->insn_count += sample->insn_cnt;
1146 ts->cyc_count += sample->cyc_cnt;
1147 ts->last_time = sample->time;
1148
1149 if (sample->flags & PERF_IP_FLAG_CALL) {
1150 bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
1151 struct call_path_root *cpr = ts->crp->cpr;
1152 struct call_path *cp;
1153 u64 ret_addr;
1154
1155 if (!sample->ip || !sample->addr)
1156 return 0;
1157
1158 ret_addr = sample->ip + sample->insn_len;
1159 if (ret_addr == sample->addr)
1160 return 0;
1161
1162 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
1163 to_al->sym, sample->addr,
1164 ts->kernel_start);
1165 err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
1166 cp, false, trace_end);
1167
1168
1169
1170
1171
1172 if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym &&
1173 from_al->sym == to_al->sym &&
1174 to_al->addr != to_al->sym->start)
1175 ts->rstate = X86_RETPOLINE_DETECTED;
1176
1177 } else if (sample->flags & PERF_IP_FLAG_RETURN) {
1178 if (!sample->addr) {
1179 u32 return_from_kernel = PERF_IP_FLAG_SYSCALLRET |
1180 PERF_IP_FLAG_INTERRUPT;
1181
1182 if (!(sample->flags & return_from_kernel))
1183 return 0;
1184
1185
1186 return thread_stack__pop_ks(thread, ts, sample, ref);
1187 }
1188
1189 if (!sample->ip)
1190 return 0;
1191
1192
1193 if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 &&
1194 ts->stack[ts->cnt - 1].ret_addr != sample->addr)
1195 return thread_stack__x86_retpoline(ts, sample, to_al);
1196
1197 err = thread_stack__pop_cp(thread, ts, sample->addr,
1198 sample->time, ref, from_al->sym);
1199 if (err) {
1200 if (err < 0)
1201 return err;
1202 err = thread_stack__no_call_return(thread, ts, sample,
1203 from_al, to_al, ref);
1204 }
1205 } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
1206 err = thread_stack__trace_begin(thread, ts, sample->time, ref);
1207 } else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
1208 err = thread_stack__trace_end(ts, sample, ref);
1209 } else if (sample->flags & PERF_IP_FLAG_BRANCH &&
1210 from_al->sym != to_al->sym && to_al->sym &&
1211 to_al->addr == to_al->sym->start) {
1212 struct call_path_root *cpr = ts->crp->cpr;
1213 struct call_path *cp;
1214
1215
1216
1217
1218
1219
1220
1221 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
1222 to_al->sym, sample->addr,
1223 ts->kernel_start);
1224 err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false,
1225 false);
1226 if (!err)
1227 ts->stack[ts->cnt - 1].non_call = true;
1228 }
1229
1230 return err;
1231 }
1232
1233 size_t thread_stack__depth(struct thread *thread, int cpu)
1234 {
1235 struct thread_stack *ts = thread__stack(thread, cpu);
1236
1237 if (!ts)
1238 return 0;
1239 return ts->cnt;
1240 }