Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * thread-stack.c: Synthesize a thread's stack using call / return events
0004  * Copyright (c) 2014, Intel Corporation.
0005  */
0006 
0007 #include <linux/rbtree.h>
0008 #include <linux/list.h>
0009 #include <linux/log2.h>
0010 #include <linux/zalloc.h>
0011 #include <errno.h>
0012 #include <stdlib.h>
0013 #include <string.h>
0014 #include "thread.h"
0015 #include "event.h"
0016 #include "machine.h"
0017 #include "env.h"
0018 #include "debug.h"
0019 #include "symbol.h"
0020 #include "comm.h"
0021 #include "call-path.h"
0022 #include "thread-stack.h"
0023 
0024 #define STACK_GROWTH 2048
0025 
0026 /*
0027  * State of retpoline detection.
0028  *
0029  * RETPOLINE_NONE: no retpoline detection
0030  * X86_RETPOLINE_POSSIBLE: x86 retpoline possible
0031  * X86_RETPOLINE_DETECTED: x86 retpoline detected
0032  */
0033 enum retpoline_state_t {
0034     RETPOLINE_NONE,
0035     X86_RETPOLINE_POSSIBLE,
0036     X86_RETPOLINE_DETECTED,
0037 };
0038 
0039 /**
0040  * struct thread_stack_entry - thread stack entry.
0041  * @ret_addr: return address
0042  * @timestamp: timestamp (if known)
0043  * @ref: external reference (e.g. db_id of sample)
0044  * @branch_count: the branch count when the entry was created
0045  * @insn_count: the instruction count when the entry was created
0046  * @cyc_count the cycle count when the entry was created
0047  * @db_id: id used for db-export
0048  * @cp: call path
0049  * @no_call: a 'call' was not seen
0050  * @trace_end: a 'call' but trace ended
0051  * @non_call: a branch but not a 'call' to the start of a different symbol
0052  */
0053 struct thread_stack_entry {
0054     u64 ret_addr;
0055     u64 timestamp;
0056     u64 ref;
0057     u64 branch_count;
0058     u64 insn_count;
0059     u64 cyc_count;
0060     u64 db_id;
0061     struct call_path *cp;
0062     bool no_call;
0063     bool trace_end;
0064     bool non_call;
0065 };
0066 
0067 /**
0068  * struct thread_stack - thread stack constructed from 'call' and 'return'
0069  *                       branch samples.
0070  * @stack: array that holds the stack
0071  * @cnt: number of entries in the stack
0072  * @sz: current maximum stack size
0073  * @trace_nr: current trace number
0074  * @branch_count: running branch count
0075  * @insn_count: running  instruction count
0076  * @cyc_count running  cycle count
0077  * @kernel_start: kernel start address
0078  * @last_time: last timestamp
0079  * @crp: call/return processor
0080  * @comm: current comm
0081  * @arr_sz: size of array if this is the first element of an array
0082  * @rstate: used to detect retpolines
0083  * @br_stack_rb: branch stack (ring buffer)
0084  * @br_stack_sz: maximum branch stack size
0085  * @br_stack_pos: current position in @br_stack_rb
0086  * @mispred_all: mark all branches as mispredicted
0087  */
0088 struct thread_stack {
0089     struct thread_stack_entry *stack;
0090     size_t cnt;
0091     size_t sz;
0092     u64 trace_nr;
0093     u64 branch_count;
0094     u64 insn_count;
0095     u64 cyc_count;
0096     u64 kernel_start;
0097     u64 last_time;
0098     struct call_return_processor *crp;
0099     struct comm *comm;
0100     unsigned int arr_sz;
0101     enum retpoline_state_t rstate;
0102     struct branch_stack *br_stack_rb;
0103     unsigned int br_stack_sz;
0104     unsigned int br_stack_pos;
0105     bool mispred_all;
0106 };
0107 
0108 /*
0109  * Assume pid == tid == 0 identifies the idle task as defined by
0110  * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
0111  * and therefore requires a stack for each cpu.
0112  */
0113 static inline bool thread_stack__per_cpu(struct thread *thread)
0114 {
0115     return !(thread->tid || thread->pid_);
0116 }
0117 
0118 static int thread_stack__grow(struct thread_stack *ts)
0119 {
0120     struct thread_stack_entry *new_stack;
0121     size_t sz, new_sz;
0122 
0123     new_sz = ts->sz + STACK_GROWTH;
0124     sz = new_sz * sizeof(struct thread_stack_entry);
0125 
0126     new_stack = realloc(ts->stack, sz);
0127     if (!new_stack)
0128         return -ENOMEM;
0129 
0130     ts->stack = new_stack;
0131     ts->sz = new_sz;
0132 
0133     return 0;
0134 }
0135 
0136 static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
0137                   struct call_return_processor *crp,
0138                   bool callstack, unsigned int br_stack_sz)
0139 {
0140     int err;
0141 
0142     if (callstack) {
0143         err = thread_stack__grow(ts);
0144         if (err)
0145             return err;
0146     }
0147 
0148     if (br_stack_sz) {
0149         size_t sz = sizeof(struct branch_stack);
0150 
0151         sz += br_stack_sz * sizeof(struct branch_entry);
0152         ts->br_stack_rb = zalloc(sz);
0153         if (!ts->br_stack_rb)
0154             return -ENOMEM;
0155         ts->br_stack_sz = br_stack_sz;
0156     }
0157 
0158     if (thread->maps && thread->maps->machine) {
0159         struct machine *machine = thread->maps->machine;
0160         const char *arch = perf_env__arch(machine->env);
0161 
0162         ts->kernel_start = machine__kernel_start(machine);
0163         if (!strcmp(arch, "x86"))
0164             ts->rstate = X86_RETPOLINE_POSSIBLE;
0165     } else {
0166         ts->kernel_start = 1ULL << 63;
0167     }
0168     ts->crp = crp;
0169 
0170     return 0;
0171 }
0172 
0173 static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
0174                           struct call_return_processor *crp,
0175                           bool callstack,
0176                           unsigned int br_stack_sz)
0177 {
0178     struct thread_stack *ts = thread->ts, *new_ts;
0179     unsigned int old_sz = ts ? ts->arr_sz : 0;
0180     unsigned int new_sz = 1;
0181 
0182     if (thread_stack__per_cpu(thread) && cpu > 0)
0183         new_sz = roundup_pow_of_two(cpu + 1);
0184 
0185     if (!ts || new_sz > old_sz) {
0186         new_ts = calloc(new_sz, sizeof(*ts));
0187         if (!new_ts)
0188             return NULL;
0189         if (ts)
0190             memcpy(new_ts, ts, old_sz * sizeof(*ts));
0191         new_ts->arr_sz = new_sz;
0192         zfree(&thread->ts);
0193         thread->ts = new_ts;
0194         ts = new_ts;
0195     }
0196 
0197     if (thread_stack__per_cpu(thread) && cpu > 0 &&
0198         (unsigned int)cpu < ts->arr_sz)
0199         ts += cpu;
0200 
0201     if (!ts->stack &&
0202         thread_stack__init(ts, thread, crp, callstack, br_stack_sz))
0203         return NULL;
0204 
0205     return ts;
0206 }
0207 
0208 static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
0209 {
0210     struct thread_stack *ts = thread->ts;
0211 
0212     if (cpu < 0)
0213         cpu = 0;
0214 
0215     if (!ts || (unsigned int)cpu >= ts->arr_sz)
0216         return NULL;
0217 
0218     ts += cpu;
0219 
0220     if (!ts->stack)
0221         return NULL;
0222 
0223     return ts;
0224 }
0225 
0226 static inline struct thread_stack *thread__stack(struct thread *thread,
0227                             int cpu)
0228 {
0229     if (!thread)
0230         return NULL;
0231 
0232     if (thread_stack__per_cpu(thread))
0233         return thread__cpu_stack(thread, cpu);
0234 
0235     return thread->ts;
0236 }
0237 
0238 static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
0239                   bool trace_end)
0240 {
0241     int err = 0;
0242 
0243     if (ts->cnt == ts->sz) {
0244         err = thread_stack__grow(ts);
0245         if (err) {
0246             pr_warning("Out of memory: discarding thread stack\n");
0247             ts->cnt = 0;
0248         }
0249     }
0250 
0251     ts->stack[ts->cnt].trace_end = trace_end;
0252     ts->stack[ts->cnt++].ret_addr = ret_addr;
0253 
0254     return err;
0255 }
0256 
0257 static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
0258 {
0259     size_t i;
0260 
0261     /*
0262      * In some cases there may be functions which are not seen to return.
0263      * For example when setjmp / longjmp has been used.  Or the perf context
0264      * switch in the kernel which doesn't stop and start tracing in exactly
0265      * the same code path.  When that happens the return address will be
0266      * further down the stack.  If the return address is not found at all,
0267      * we assume the opposite (i.e. this is a return for a call that wasn't
0268      * seen for some reason) and leave the stack alone.
0269      */
0270     for (i = ts->cnt; i; ) {
0271         if (ts->stack[--i].ret_addr == ret_addr) {
0272             ts->cnt = i;
0273             return;
0274         }
0275     }
0276 }
0277 
0278 static void thread_stack__pop_trace_end(struct thread_stack *ts)
0279 {
0280     size_t i;
0281 
0282     for (i = ts->cnt; i; ) {
0283         if (ts->stack[--i].trace_end)
0284             ts->cnt = i;
0285         else
0286             return;
0287     }
0288 }
0289 
0290 static bool thread_stack__in_kernel(struct thread_stack *ts)
0291 {
0292     if (!ts->cnt)
0293         return false;
0294 
0295     return ts->stack[ts->cnt - 1].cp->in_kernel;
0296 }
0297 
0298 static int thread_stack__call_return(struct thread *thread,
0299                      struct thread_stack *ts, size_t idx,
0300                      u64 timestamp, u64 ref, bool no_return)
0301 {
0302     struct call_return_processor *crp = ts->crp;
0303     struct thread_stack_entry *tse;
0304     struct call_return cr = {
0305         .thread = thread,
0306         .comm = ts->comm,
0307         .db_id = 0,
0308     };
0309     u64 *parent_db_id;
0310 
0311     tse = &ts->stack[idx];
0312     cr.cp = tse->cp;
0313     cr.call_time = tse->timestamp;
0314     cr.return_time = timestamp;
0315     cr.branch_count = ts->branch_count - tse->branch_count;
0316     cr.insn_count = ts->insn_count - tse->insn_count;
0317     cr.cyc_count = ts->cyc_count - tse->cyc_count;
0318     cr.db_id = tse->db_id;
0319     cr.call_ref = tse->ref;
0320     cr.return_ref = ref;
0321     if (tse->no_call)
0322         cr.flags |= CALL_RETURN_NO_CALL;
0323     if (no_return)
0324         cr.flags |= CALL_RETURN_NO_RETURN;
0325     if (tse->non_call)
0326         cr.flags |= CALL_RETURN_NON_CALL;
0327 
0328     /*
0329      * The parent db_id must be assigned before exporting the child. Note
0330      * it is not possible to export the parent first because its information
0331      * is not yet complete because its 'return' has not yet been processed.
0332      */
0333     parent_db_id = idx ? &(tse - 1)->db_id : NULL;
0334 
0335     return crp->process(&cr, parent_db_id, crp->data);
0336 }
0337 
0338 static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
0339 {
0340     struct call_return_processor *crp = ts->crp;
0341     int err;
0342 
0343     if (!crp) {
0344         ts->cnt = 0;
0345         ts->br_stack_pos = 0;
0346         if (ts->br_stack_rb)
0347             ts->br_stack_rb->nr = 0;
0348         return 0;
0349     }
0350 
0351     while (ts->cnt) {
0352         err = thread_stack__call_return(thread, ts, --ts->cnt,
0353                         ts->last_time, 0, true);
0354         if (err) {
0355             pr_err("Error flushing thread stack!\n");
0356             ts->cnt = 0;
0357             return err;
0358         }
0359     }
0360 
0361     return 0;
0362 }
0363 
0364 int thread_stack__flush(struct thread *thread)
0365 {
0366     struct thread_stack *ts = thread->ts;
0367     unsigned int pos;
0368     int err = 0;
0369 
0370     if (ts) {
0371         for (pos = 0; pos < ts->arr_sz; pos++) {
0372             int ret = __thread_stack__flush(thread, ts + pos);
0373 
0374             if (ret)
0375                 err = ret;
0376         }
0377     }
0378 
0379     return err;
0380 }
0381 
0382 static void thread_stack__update_br_stack(struct thread_stack *ts, u32 flags,
0383                       u64 from_ip, u64 to_ip)
0384 {
0385     struct branch_stack *bs = ts->br_stack_rb;
0386     struct branch_entry *be;
0387 
0388     if (!ts->br_stack_pos)
0389         ts->br_stack_pos = ts->br_stack_sz;
0390 
0391     ts->br_stack_pos -= 1;
0392 
0393     be              = &bs->entries[ts->br_stack_pos];
0394     be->from        = from_ip;
0395     be->to          = to_ip;
0396     be->flags.value = 0;
0397     be->flags.abort = !!(flags & PERF_IP_FLAG_TX_ABORT);
0398     be->flags.in_tx = !!(flags & PERF_IP_FLAG_IN_TX);
0399     /* No support for mispredict */
0400     be->flags.mispred = ts->mispred_all;
0401 
0402     if (bs->nr < ts->br_stack_sz)
0403         bs->nr += 1;
0404 }
0405 
0406 int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
0407             u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack,
0408             unsigned int br_stack_sz, bool mispred_all)
0409 {
0410     struct thread_stack *ts = thread__stack(thread, cpu);
0411 
0412     if (!thread)
0413         return -EINVAL;
0414 
0415     if (!ts) {
0416         ts = thread_stack__new(thread, cpu, NULL, callstack, br_stack_sz);
0417         if (!ts) {
0418             pr_warning("Out of memory: no thread stack\n");
0419             return -ENOMEM;
0420         }
0421         ts->trace_nr = trace_nr;
0422         ts->mispred_all = mispred_all;
0423     }
0424 
0425     /*
0426      * When the trace is discontinuous, the trace_nr changes.  In that case
0427      * the stack might be completely invalid.  Better to report nothing than
0428      * to report something misleading, so flush the stack.
0429      */
0430     if (trace_nr != ts->trace_nr) {
0431         if (ts->trace_nr)
0432             __thread_stack__flush(thread, ts);
0433         ts->trace_nr = trace_nr;
0434     }
0435 
0436     if (br_stack_sz)
0437         thread_stack__update_br_stack(ts, flags, from_ip, to_ip);
0438 
0439     /*
0440      * Stop here if thread_stack__process() is in use, or not recording call
0441      * stack.
0442      */
0443     if (ts->crp || !callstack)
0444         return 0;
0445 
0446     if (flags & PERF_IP_FLAG_CALL) {
0447         u64 ret_addr;
0448 
0449         if (!to_ip)
0450             return 0;
0451         ret_addr = from_ip + insn_len;
0452         if (ret_addr == to_ip)
0453             return 0; /* Zero-length calls are excluded */
0454         return thread_stack__push(ts, ret_addr,
0455                       flags & PERF_IP_FLAG_TRACE_END);
0456     } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
0457         /*
0458          * If the caller did not change the trace number (which would
0459          * have flushed the stack) then try to make sense of the stack.
0460          * Possibly, tracing began after returning to the current
0461          * address, so try to pop that. Also, do not expect a call made
0462          * when the trace ended, to return, so pop that.
0463          */
0464         thread_stack__pop(ts, to_ip);
0465         thread_stack__pop_trace_end(ts);
0466     } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
0467         thread_stack__pop(ts, to_ip);
0468     }
0469 
0470     return 0;
0471 }
0472 
0473 void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
0474 {
0475     struct thread_stack *ts = thread__stack(thread, cpu);
0476 
0477     if (!ts)
0478         return;
0479 
0480     if (trace_nr != ts->trace_nr) {
0481         if (ts->trace_nr)
0482             __thread_stack__flush(thread, ts);
0483         ts->trace_nr = trace_nr;
0484     }
0485 }
0486 
0487 static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
0488 {
0489     __thread_stack__flush(thread, ts);
0490     zfree(&ts->stack);
0491     zfree(&ts->br_stack_rb);
0492 }
0493 
0494 static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
0495 {
0496     unsigned int arr_sz = ts->arr_sz;
0497 
0498     __thread_stack__free(thread, ts);
0499     memset(ts, 0, sizeof(*ts));
0500     ts->arr_sz = arr_sz;
0501 }
0502 
0503 void thread_stack__free(struct thread *thread)
0504 {
0505     struct thread_stack *ts = thread->ts;
0506     unsigned int pos;
0507 
0508     if (ts) {
0509         for (pos = 0; pos < ts->arr_sz; pos++)
0510             __thread_stack__free(thread, ts + pos);
0511         zfree(&thread->ts);
0512     }
0513 }
0514 
0515 static inline u64 callchain_context(u64 ip, u64 kernel_start)
0516 {
0517     return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
0518 }
0519 
0520 void thread_stack__sample(struct thread *thread, int cpu,
0521               struct ip_callchain *chain,
0522               size_t sz, u64 ip, u64 kernel_start)
0523 {
0524     struct thread_stack *ts = thread__stack(thread, cpu);
0525     u64 context = callchain_context(ip, kernel_start);
0526     u64 last_context;
0527     size_t i, j;
0528 
0529     if (sz < 2) {
0530         chain->nr = 0;
0531         return;
0532     }
0533 
0534     chain->ips[0] = context;
0535     chain->ips[1] = ip;
0536 
0537     if (!ts) {
0538         chain->nr = 2;
0539         return;
0540     }
0541 
0542     last_context = context;
0543 
0544     for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
0545         ip = ts->stack[ts->cnt - j].ret_addr;
0546         context = callchain_context(ip, kernel_start);
0547         if (context != last_context) {
0548             if (i >= sz - 1)
0549                 break;
0550             chain->ips[i++] = context;
0551             last_context = context;
0552         }
0553         chain->ips[i] = ip;
0554     }
0555 
0556     chain->nr = i;
0557 }
0558 
0559 /*
0560  * Hardware sample records, created some time after the event occurred, need to
0561  * have subsequent addresses removed from the call chain.
0562  */
0563 void thread_stack__sample_late(struct thread *thread, int cpu,
0564                    struct ip_callchain *chain, size_t sz,
0565                    u64 sample_ip, u64 kernel_start)
0566 {
0567     struct thread_stack *ts = thread__stack(thread, cpu);
0568     u64 sample_context = callchain_context(sample_ip, kernel_start);
0569     u64 last_context, context, ip;
0570     size_t nr = 0, j;
0571 
0572     if (sz < 2) {
0573         chain->nr = 0;
0574         return;
0575     }
0576 
0577     if (!ts)
0578         goto out;
0579 
0580     /*
0581      * When tracing kernel space, kernel addresses occur at the top of the
0582      * call chain after the event occurred but before tracing stopped.
0583      * Skip them.
0584      */
0585     for (j = 1; j <= ts->cnt; j++) {
0586         ip = ts->stack[ts->cnt - j].ret_addr;
0587         context = callchain_context(ip, kernel_start);
0588         if (context == PERF_CONTEXT_USER ||
0589             (context == sample_context && ip == sample_ip))
0590             break;
0591     }
0592 
0593     last_context = sample_ip; /* Use sample_ip as an invalid context */
0594 
0595     for (; nr < sz && j <= ts->cnt; nr++, j++) {
0596         ip = ts->stack[ts->cnt - j].ret_addr;
0597         context = callchain_context(ip, kernel_start);
0598         if (context != last_context) {
0599             if (nr >= sz - 1)
0600                 break;
0601             chain->ips[nr++] = context;
0602             last_context = context;
0603         }
0604         chain->ips[nr] = ip;
0605     }
0606 out:
0607     if (nr) {
0608         chain->nr = nr;
0609     } else {
0610         chain->ips[0] = sample_context;
0611         chain->ips[1] = sample_ip;
0612         chain->nr = 2;
0613     }
0614 }
0615 
0616 void thread_stack__br_sample(struct thread *thread, int cpu,
0617                  struct branch_stack *dst, unsigned int sz)
0618 {
0619     struct thread_stack *ts = thread__stack(thread, cpu);
0620     const size_t bsz = sizeof(struct branch_entry);
0621     struct branch_stack *src;
0622     struct branch_entry *be;
0623     unsigned int nr;
0624 
0625     dst->nr = 0;
0626 
0627     if (!ts)
0628         return;
0629 
0630     src = ts->br_stack_rb;
0631     if (!src->nr)
0632         return;
0633 
0634     dst->nr = min((unsigned int)src->nr, sz);
0635 
0636     be = &dst->entries[0];
0637     nr = min(ts->br_stack_sz - ts->br_stack_pos, (unsigned int)dst->nr);
0638     memcpy(be, &src->entries[ts->br_stack_pos], bsz * nr);
0639 
0640     if (src->nr >= ts->br_stack_sz) {
0641         sz -= nr;
0642         be = &dst->entries[nr];
0643         nr = min(ts->br_stack_pos, sz);
0644         memcpy(be, &src->entries[0], bsz * ts->br_stack_pos);
0645     }
0646 }
0647 
0648 /* Start of user space branch entries */
0649 static bool us_start(struct branch_entry *be, u64 kernel_start, bool *start)
0650 {
0651     if (!*start)
0652         *start = be->to && be->to < kernel_start;
0653 
0654     return *start;
0655 }
0656 
0657 /*
0658  * Start of branch entries after the ip fell in between 2 branches, or user
0659  * space branch entries.
0660  */
0661 static bool ks_start(struct branch_entry *be, u64 sample_ip, u64 kernel_start,
0662              bool *start, struct branch_entry *nb)
0663 {
0664     if (!*start) {
0665         *start = (nb && sample_ip >= be->to && sample_ip <= nb->from) ||
0666              be->from < kernel_start ||
0667              (be->to && be->to < kernel_start);
0668     }
0669 
0670     return *start;
0671 }
0672 
0673 /*
0674  * Hardware sample records, created some time after the event occurred, need to
0675  * have subsequent addresses removed from the branch stack.
0676  */
0677 void thread_stack__br_sample_late(struct thread *thread, int cpu,
0678                   struct branch_stack *dst, unsigned int sz,
0679                   u64 ip, u64 kernel_start)
0680 {
0681     struct thread_stack *ts = thread__stack(thread, cpu);
0682     struct branch_entry *d, *s, *spos, *ssz;
0683     struct branch_stack *src;
0684     unsigned int nr = 0;
0685     bool start = false;
0686 
0687     dst->nr = 0;
0688 
0689     if (!ts)
0690         return;
0691 
0692     src = ts->br_stack_rb;
0693     if (!src->nr)
0694         return;
0695 
0696     spos = &src->entries[ts->br_stack_pos];
0697     ssz  = &src->entries[ts->br_stack_sz];
0698 
0699     d = &dst->entries[0];
0700     s = spos;
0701 
0702     if (ip < kernel_start) {
0703         /*
0704          * User space sample: start copying branch entries when the
0705          * branch is in user space.
0706          */
0707         for (s = spos; s < ssz && nr < sz; s++) {
0708             if (us_start(s, kernel_start, &start)) {
0709                 *d++ = *s;
0710                 nr += 1;
0711             }
0712         }
0713 
0714         if (src->nr >= ts->br_stack_sz) {
0715             for (s = &src->entries[0]; s < spos && nr < sz; s++) {
0716                 if (us_start(s, kernel_start, &start)) {
0717                     *d++ = *s;
0718                     nr += 1;
0719                 }
0720             }
0721         }
0722     } else {
0723         struct branch_entry *nb = NULL;
0724 
0725         /*
0726          * Kernel space sample: start copying branch entries when the ip
0727          * falls in between 2 branches (or the branch is in user space
0728          * because then the start must have been missed).
0729          */
0730         for (s = spos; s < ssz && nr < sz; s++) {
0731             if (ks_start(s, ip, kernel_start, &start, nb)) {
0732                 *d++ = *s;
0733                 nr += 1;
0734             }
0735             nb = s;
0736         }
0737 
0738         if (src->nr >= ts->br_stack_sz) {
0739             for (s = &src->entries[0]; s < spos && nr < sz; s++) {
0740                 if (ks_start(s, ip, kernel_start, &start, nb)) {
0741                     *d++ = *s;
0742                     nr += 1;
0743                 }
0744                 nb = s;
0745             }
0746         }
0747     }
0748 
0749     dst->nr = nr;
0750 }
0751 
0752 struct call_return_processor *
0753 call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data),
0754                void *data)
0755 {
0756     struct call_return_processor *crp;
0757 
0758     crp = zalloc(sizeof(struct call_return_processor));
0759     if (!crp)
0760         return NULL;
0761     crp->cpr = call_path_root__new();
0762     if (!crp->cpr)
0763         goto out_free;
0764     crp->process = process;
0765     crp->data = data;
0766     return crp;
0767 
0768 out_free:
0769     free(crp);
0770     return NULL;
0771 }
0772 
0773 void call_return_processor__free(struct call_return_processor *crp)
0774 {
0775     if (crp) {
0776         call_path_root__free(crp->cpr);
0777         free(crp);
0778     }
0779 }
0780 
0781 static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
0782                  u64 timestamp, u64 ref, struct call_path *cp,
0783                  bool no_call, bool trace_end)
0784 {
0785     struct thread_stack_entry *tse;
0786     int err;
0787 
0788     if (!cp)
0789         return -ENOMEM;
0790 
0791     if (ts->cnt == ts->sz) {
0792         err = thread_stack__grow(ts);
0793         if (err)
0794             return err;
0795     }
0796 
0797     tse = &ts->stack[ts->cnt++];
0798     tse->ret_addr = ret_addr;
0799     tse->timestamp = timestamp;
0800     tse->ref = ref;
0801     tse->branch_count = ts->branch_count;
0802     tse->insn_count = ts->insn_count;
0803     tse->cyc_count = ts->cyc_count;
0804     tse->cp = cp;
0805     tse->no_call = no_call;
0806     tse->trace_end = trace_end;
0807     tse->non_call = false;
0808     tse->db_id = 0;
0809 
0810     return 0;
0811 }
0812 
0813 static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
0814                 u64 ret_addr, u64 timestamp, u64 ref,
0815                 struct symbol *sym)
0816 {
0817     int err;
0818 
0819     if (!ts->cnt)
0820         return 1;
0821 
0822     if (ts->cnt == 1) {
0823         struct thread_stack_entry *tse = &ts->stack[0];
0824 
0825         if (tse->cp->sym == sym)
0826             return thread_stack__call_return(thread, ts, --ts->cnt,
0827                              timestamp, ref, false);
0828     }
0829 
0830     if (ts->stack[ts->cnt - 1].ret_addr == ret_addr &&
0831         !ts->stack[ts->cnt - 1].non_call) {
0832         return thread_stack__call_return(thread, ts, --ts->cnt,
0833                          timestamp, ref, false);
0834     } else {
0835         size_t i = ts->cnt - 1;
0836 
0837         while (i--) {
0838             if (ts->stack[i].ret_addr != ret_addr ||
0839                 ts->stack[i].non_call)
0840                 continue;
0841             i += 1;
0842             while (ts->cnt > i) {
0843                 err = thread_stack__call_return(thread, ts,
0844                                 --ts->cnt,
0845                                 timestamp, ref,
0846                                 true);
0847                 if (err)
0848                     return err;
0849             }
0850             return thread_stack__call_return(thread, ts, --ts->cnt,
0851                              timestamp, ref, false);
0852         }
0853     }
0854 
0855     return 1;
0856 }
0857 
0858 static int thread_stack__bottom(struct thread_stack *ts,
0859                 struct perf_sample *sample,
0860                 struct addr_location *from_al,
0861                 struct addr_location *to_al, u64 ref)
0862 {
0863     struct call_path_root *cpr = ts->crp->cpr;
0864     struct call_path *cp;
0865     struct symbol *sym;
0866     u64 ip;
0867 
0868     if (sample->ip) {
0869         ip = sample->ip;
0870         sym = from_al->sym;
0871     } else if (sample->addr) {
0872         ip = sample->addr;
0873         sym = to_al->sym;
0874     } else {
0875         return 0;
0876     }
0877 
0878     cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
0879                 ts->kernel_start);
0880 
0881     return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
0882                      true, false);
0883 }
0884 
0885 static int thread_stack__pop_ks(struct thread *thread, struct thread_stack *ts,
0886                 struct perf_sample *sample, u64 ref)
0887 {
0888     u64 tm = sample->time;
0889     int err;
0890 
0891     /* Return to userspace, so pop all kernel addresses */
0892     while (thread_stack__in_kernel(ts)) {
0893         err = thread_stack__call_return(thread, ts, --ts->cnt,
0894                         tm, ref, true);
0895         if (err)
0896             return err;
0897     }
0898 
0899     return 0;
0900 }
0901 
0902 static int thread_stack__no_call_return(struct thread *thread,
0903                     struct thread_stack *ts,
0904                     struct perf_sample *sample,
0905                     struct addr_location *from_al,
0906                     struct addr_location *to_al, u64 ref)
0907 {
0908     struct call_path_root *cpr = ts->crp->cpr;
0909     struct call_path *root = &cpr->call_path;
0910     struct symbol *fsym = from_al->sym;
0911     struct symbol *tsym = to_al->sym;
0912     struct call_path *cp, *parent;
0913     u64 ks = ts->kernel_start;
0914     u64 addr = sample->addr;
0915     u64 tm = sample->time;
0916     u64 ip = sample->ip;
0917     int err;
0918 
0919     if (ip >= ks && addr < ks) {
0920         /* Return to userspace, so pop all kernel addresses */
0921         err = thread_stack__pop_ks(thread, ts, sample, ref);
0922         if (err)
0923             return err;
0924 
0925         /* If the stack is empty, push the userspace address */
0926         if (!ts->cnt) {
0927             cp = call_path__findnew(cpr, root, tsym, addr, ks);
0928             return thread_stack__push_cp(ts, 0, tm, ref, cp, true,
0929                              false);
0930         }
0931     } else if (thread_stack__in_kernel(ts) && ip < ks) {
0932         /* Return to userspace, so pop all kernel addresses */
0933         err = thread_stack__pop_ks(thread, ts, sample, ref);
0934         if (err)
0935             return err;
0936     }
0937 
0938     if (ts->cnt)
0939         parent = ts->stack[ts->cnt - 1].cp;
0940     else
0941         parent = root;
0942 
0943     if (parent->sym == from_al->sym) {
0944         /*
0945          * At the bottom of the stack, assume the missing 'call' was
0946          * before the trace started. So, pop the current symbol and push
0947          * the 'to' symbol.
0948          */
0949         if (ts->cnt == 1) {
0950             err = thread_stack__call_return(thread, ts, --ts->cnt,
0951                             tm, ref, false);
0952             if (err)
0953                 return err;
0954         }
0955 
0956         if (!ts->cnt) {
0957             cp = call_path__findnew(cpr, root, tsym, addr, ks);
0958 
0959             return thread_stack__push_cp(ts, addr, tm, ref, cp,
0960                              true, false);
0961         }
0962 
0963         /*
0964          * Otherwise assume the 'return' is being used as a jump (e.g.
0965          * retpoline) and just push the 'to' symbol.
0966          */
0967         cp = call_path__findnew(cpr, parent, tsym, addr, ks);
0968 
0969         err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false);
0970         if (!err)
0971             ts->stack[ts->cnt - 1].non_call = true;
0972 
0973         return err;
0974     }
0975 
0976     /*
0977      * Assume 'parent' has not yet returned, so push 'to', and then push and
0978      * pop 'from'.
0979      */
0980 
0981     cp = call_path__findnew(cpr, parent, tsym, addr, ks);
0982 
0983     err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false);
0984     if (err)
0985         return err;
0986 
0987     cp = call_path__findnew(cpr, cp, fsym, ip, ks);
0988 
0989     err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false);
0990     if (err)
0991         return err;
0992 
0993     return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false);
0994 }
0995 
0996 static int thread_stack__trace_begin(struct thread *thread,
0997                      struct thread_stack *ts, u64 timestamp,
0998                      u64 ref)
0999 {
1000     struct thread_stack_entry *tse;
1001     int err;
1002 
1003     if (!ts->cnt)
1004         return 0;
1005 
1006     /* Pop trace end */
1007     tse = &ts->stack[ts->cnt - 1];
1008     if (tse->trace_end) {
1009         err = thread_stack__call_return(thread, ts, --ts->cnt,
1010                         timestamp, ref, false);
1011         if (err)
1012             return err;
1013     }
1014 
1015     return 0;
1016 }
1017 
1018 static int thread_stack__trace_end(struct thread_stack *ts,
1019                    struct perf_sample *sample, u64 ref)
1020 {
1021     struct call_path_root *cpr = ts->crp->cpr;
1022     struct call_path *cp;
1023     u64 ret_addr;
1024 
1025     /* No point having 'trace end' on the bottom of the stack */
1026     if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
1027         return 0;
1028 
1029     cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
1030                 ts->kernel_start);
1031 
1032     ret_addr = sample->ip + sample->insn_len;
1033 
1034     return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
1035                      false, true);
1036 }
1037 
1038 static bool is_x86_retpoline(const char *name)
1039 {
1040     const char *p = strstr(name, "__x86_indirect_thunk_");
1041 
1042     return p == name || !strcmp(name, "__indirect_thunk_start");
1043 }
1044 
1045 /*
1046  * x86 retpoline functions pollute the call graph. This function removes them.
1047  * This does not handle function return thunks, nor is there any improvement
1048  * for the handling of inline thunks or extern thunks.
1049  */
1050 static int thread_stack__x86_retpoline(struct thread_stack *ts,
1051                        struct perf_sample *sample,
1052                        struct addr_location *to_al)
1053 {
1054     struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1];
1055     struct call_path_root *cpr = ts->crp->cpr;
1056     struct symbol *sym = tse->cp->sym;
1057     struct symbol *tsym = to_al->sym;
1058     struct call_path *cp;
1059 
1060     if (sym && is_x86_retpoline(sym->name)) {
1061         /*
1062          * This is a x86 retpoline fn. It pollutes the call graph by
1063          * showing up everywhere there is an indirect branch, but does
1064          * not itself mean anything. Here the top-of-stack is removed,
1065          * by decrementing the stack count, and then further down, the
1066          * resulting top-of-stack is replaced with the actual target.
1067          * The result is that the retpoline functions will no longer
1068          * appear in the call graph. Note this only affects the call
1069          * graph, since all the original branches are left unchanged.
1070          */
1071         ts->cnt -= 1;
1072         sym = ts->stack[ts->cnt - 2].cp->sym;
1073         if (sym && sym == tsym && to_al->addr != tsym->start) {
1074             /*
1075              * Target is back to the middle of the symbol we came
1076              * from so assume it is an indirect jmp and forget it
1077              * altogether.
1078              */
1079             ts->cnt -= 1;
1080             return 0;
1081         }
1082     } else if (sym && sym == tsym) {
1083         /*
1084          * Target is back to the symbol we came from so assume it is an
1085          * indirect jmp and forget it altogether.
1086          */
1087         ts->cnt -= 1;
1088         return 0;
1089     }
1090 
1091     cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym,
1092                 sample->addr, ts->kernel_start);
1093     if (!cp)
1094         return -ENOMEM;
1095 
1096     /* Replace the top-of-stack with the actual target */
1097     ts->stack[ts->cnt - 1].cp = cp;
1098 
1099     return 0;
1100 }
1101 
1102 int thread_stack__process(struct thread *thread, struct comm *comm,
1103               struct perf_sample *sample,
1104               struct addr_location *from_al,
1105               struct addr_location *to_al, u64 ref,
1106               struct call_return_processor *crp)
1107 {
1108     struct thread_stack *ts = thread__stack(thread, sample->cpu);
1109     enum retpoline_state_t rstate;
1110     int err = 0;
1111 
1112     if (ts && !ts->crp) {
1113         /* Supersede thread_stack__event() */
1114         thread_stack__reset(thread, ts);
1115         ts = NULL;
1116     }
1117 
1118     if (!ts) {
1119         ts = thread_stack__new(thread, sample->cpu, crp, true, 0);
1120         if (!ts)
1121             return -ENOMEM;
1122         ts->comm = comm;
1123     }
1124 
1125     rstate = ts->rstate;
1126     if (rstate == X86_RETPOLINE_DETECTED)
1127         ts->rstate = X86_RETPOLINE_POSSIBLE;
1128 
1129     /* Flush stack on exec */
1130     if (ts->comm != comm && thread->pid_ == thread->tid) {
1131         err = __thread_stack__flush(thread, ts);
1132         if (err)
1133             return err;
1134         ts->comm = comm;
1135     }
1136 
1137     /* If the stack is empty, put the current symbol on the stack */
1138     if (!ts->cnt) {
1139         err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
1140         if (err)
1141             return err;
1142     }
1143 
1144     ts->branch_count += 1;
1145     ts->insn_count += sample->insn_cnt;
1146     ts->cyc_count += sample->cyc_cnt;
1147     ts->last_time = sample->time;
1148 
1149     if (sample->flags & PERF_IP_FLAG_CALL) {
1150         bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
1151         struct call_path_root *cpr = ts->crp->cpr;
1152         struct call_path *cp;
1153         u64 ret_addr;
1154 
1155         if (!sample->ip || !sample->addr)
1156             return 0;
1157 
1158         ret_addr = sample->ip + sample->insn_len;
1159         if (ret_addr == sample->addr)
1160             return 0; /* Zero-length calls are excluded */
1161 
1162         cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
1163                     to_al->sym, sample->addr,
1164                     ts->kernel_start);
1165         err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
1166                         cp, false, trace_end);
1167 
1168         /*
1169          * A call to the same symbol but not the start of the symbol,
1170          * may be the start of a x86 retpoline.
1171          */
1172         if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym &&
1173             from_al->sym == to_al->sym &&
1174             to_al->addr != to_al->sym->start)
1175             ts->rstate = X86_RETPOLINE_DETECTED;
1176 
1177     } else if (sample->flags & PERF_IP_FLAG_RETURN) {
1178         if (!sample->addr) {
1179             u32 return_from_kernel = PERF_IP_FLAG_SYSCALLRET |
1180                          PERF_IP_FLAG_INTERRUPT;
1181 
1182             if (!(sample->flags & return_from_kernel))
1183                 return 0;
1184 
1185             /* Pop kernel stack */
1186             return thread_stack__pop_ks(thread, ts, sample, ref);
1187         }
1188 
1189         if (!sample->ip)
1190             return 0;
1191 
1192         /* x86 retpoline 'return' doesn't match the stack */
1193         if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 &&
1194             ts->stack[ts->cnt - 1].ret_addr != sample->addr)
1195             return thread_stack__x86_retpoline(ts, sample, to_al);
1196 
1197         err = thread_stack__pop_cp(thread, ts, sample->addr,
1198                        sample->time, ref, from_al->sym);
1199         if (err) {
1200             if (err < 0)
1201                 return err;
1202             err = thread_stack__no_call_return(thread, ts, sample,
1203                                from_al, to_al, ref);
1204         }
1205     } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
1206         err = thread_stack__trace_begin(thread, ts, sample->time, ref);
1207     } else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
1208         err = thread_stack__trace_end(ts, sample, ref);
1209     } else if (sample->flags & PERF_IP_FLAG_BRANCH &&
1210            from_al->sym != to_al->sym && to_al->sym &&
1211            to_al->addr == to_al->sym->start) {
1212         struct call_path_root *cpr = ts->crp->cpr;
1213         struct call_path *cp;
1214 
1215         /*
1216          * The compiler might optimize a call/ret combination by making
1217          * it a jmp. Make that visible by recording on the stack a
1218          * branch to the start of a different symbol. Note, that means
1219          * when a ret pops the stack, all jmps must be popped off first.
1220          */
1221         cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
1222                     to_al->sym, sample->addr,
1223                     ts->kernel_start);
1224         err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false,
1225                         false);
1226         if (!err)
1227             ts->stack[ts->cnt - 1].non_call = true;
1228     }
1229 
1230     return err;
1231 }
1232 
1233 size_t thread_stack__depth(struct thread *thread, int cpu)
1234 {
1235     struct thread_stack *ts = thread__stack(thread, cpu);
1236 
1237     if (!ts)
1238         return 0;
1239     return ts->cnt;
1240 }