Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Infrastructure to took into function calls and returns.
0004  * Copyright (c) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
0005  * Mostly borrowed from function tracer which
0006  * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
0007  *
0008  * Highly modified by Steven Rostedt (VMware).
0009  */
0010 #include <linux/jump_label.h>
0011 #include <linux/suspend.h>
0012 #include <linux/ftrace.h>
0013 #include <linux/slab.h>
0014 
0015 #include <trace/events/sched.h>
0016 
0017 #include "ftrace_internal.h"
0018 
0019 #ifdef CONFIG_DYNAMIC_FTRACE
0020 #define ASSIGN_OPS_HASH(opsname, val) \
0021     .func_hash      = val, \
0022     .local_hash.regex_lock  = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock),
0023 #else
0024 #define ASSIGN_OPS_HASH(opsname, val)
0025 #endif
0026 
0027 DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph);
0028 int ftrace_graph_active;
0029 
0030 /* Both enabled by default (can be cleared by function_graph tracer flags */
0031 static bool fgraph_sleep_time = true;
0032 
0033 #ifdef CONFIG_DYNAMIC_FTRACE
0034 /*
0035  * archs can override this function if they must do something
0036  * to enable hook for graph tracer.
0037  */
0038 int __weak ftrace_enable_ftrace_graph_caller(void)
0039 {
0040     return 0;
0041 }
0042 
0043 /*
0044  * archs can override this function if they must do something
0045  * to disable hook for graph tracer.
0046  */
0047 int __weak ftrace_disable_ftrace_graph_caller(void)
0048 {
0049     return 0;
0050 }
0051 #endif
0052 
0053 /**
0054  * ftrace_graph_stop - set to permanently disable function graph tracing
0055  *
0056  * In case of an error int function graph tracing, this is called
0057  * to try to keep function graph tracing from causing any more harm.
0058  * Usually this is pretty severe and this is called to try to at least
0059  * get a warning out to the user.
0060  */
0061 void ftrace_graph_stop(void)
0062 {
0063     static_branch_enable(&kill_ftrace_graph);
0064 }
0065 
0066 /* Add a function return address to the trace stack on thread info.*/
0067 static int
0068 ftrace_push_return_trace(unsigned long ret, unsigned long func,
0069              unsigned long frame_pointer, unsigned long *retp)
0070 {
0071     unsigned long long calltime;
0072     int index;
0073 
0074     if (unlikely(ftrace_graph_is_dead()))
0075         return -EBUSY;
0076 
0077     if (!current->ret_stack)
0078         return -EBUSY;
0079 
0080     /*
0081      * We must make sure the ret_stack is tested before we read
0082      * anything else.
0083      */
0084     smp_rmb();
0085 
0086     /* The return trace stack is full */
0087     if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
0088         atomic_inc(&current->trace_overrun);
0089         return -EBUSY;
0090     }
0091 
0092     calltime = trace_clock_local();
0093 
0094     index = ++current->curr_ret_stack;
0095     barrier();
0096     current->ret_stack[index].ret = ret;
0097     current->ret_stack[index].func = func;
0098     current->ret_stack[index].calltime = calltime;
0099 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
0100     current->ret_stack[index].fp = frame_pointer;
0101 #endif
0102 #ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
0103     current->ret_stack[index].retp = retp;
0104 #endif
0105     return 0;
0106 }
0107 
0108 /*
0109  * Not all archs define MCOUNT_INSN_SIZE which is used to look for direct
0110  * functions. But those archs currently don't support direct functions
0111  * anyway, and ftrace_find_rec_direct() is just a stub for them.
0112  * Define MCOUNT_INSN_SIZE to keep those archs compiling.
0113  */
0114 #ifndef MCOUNT_INSN_SIZE
0115 /* Make sure this only works without direct calls */
0116 # ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
0117 #  error MCOUNT_INSN_SIZE not defined with direct calls enabled
0118 # endif
0119 # define MCOUNT_INSN_SIZE 0
0120 #endif
0121 
0122 int function_graph_enter(unsigned long ret, unsigned long func,
0123              unsigned long frame_pointer, unsigned long *retp)
0124 {
0125     struct ftrace_graph_ent trace;
0126 
0127 #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
0128     /*
0129      * Skip graph tracing if the return location is served by direct trampoline,
0130      * since call sequence and return addresses are unpredictable anyway.
0131      * Ex: BPF trampoline may call original function and may skip frame
0132      * depending on type of BPF programs attached.
0133      */
0134     if (ftrace_direct_func_count &&
0135         ftrace_find_rec_direct(ret - MCOUNT_INSN_SIZE))
0136         return -EBUSY;
0137 #endif
0138     trace.func = func;
0139     trace.depth = ++current->curr_ret_depth;
0140 
0141     if (ftrace_push_return_trace(ret, func, frame_pointer, retp))
0142         goto out;
0143 
0144     /* Only trace if the calling function expects to */
0145     if (!ftrace_graph_entry(&trace))
0146         goto out_ret;
0147 
0148     return 0;
0149  out_ret:
0150     current->curr_ret_stack--;
0151  out:
0152     current->curr_ret_depth--;
0153     return -EBUSY;
0154 }
0155 
0156 /* Retrieve a function return address to the trace stack on thread info.*/
0157 static void
0158 ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
0159             unsigned long frame_pointer)
0160 {
0161     int index;
0162 
0163     index = current->curr_ret_stack;
0164 
0165     if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) {
0166         ftrace_graph_stop();
0167         WARN_ON(1);
0168         /* Might as well panic, otherwise we have no where to go */
0169         *ret = (unsigned long)panic;
0170         return;
0171     }
0172 
0173 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
0174     /*
0175      * The arch may choose to record the frame pointer used
0176      * and check it here to make sure that it is what we expect it
0177      * to be. If gcc does not set the place holder of the return
0178      * address in the frame pointer, and does a copy instead, then
0179      * the function graph trace will fail. This test detects this
0180      * case.
0181      *
0182      * Currently, x86_32 with optimize for size (-Os) makes the latest
0183      * gcc do the above.
0184      *
0185      * Note, -mfentry does not use frame pointers, and this test
0186      *  is not needed if CC_USING_FENTRY is set.
0187      */
0188     if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
0189         ftrace_graph_stop();
0190         WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
0191              "  from func %ps return to %lx\n",
0192              current->ret_stack[index].fp,
0193              frame_pointer,
0194              (void *)current->ret_stack[index].func,
0195              current->ret_stack[index].ret);
0196         *ret = (unsigned long)panic;
0197         return;
0198     }
0199 #endif
0200 
0201     *ret = current->ret_stack[index].ret;
0202     trace->func = current->ret_stack[index].func;
0203     trace->calltime = current->ret_stack[index].calltime;
0204     trace->overrun = atomic_read(&current->trace_overrun);
0205     trace->depth = current->curr_ret_depth--;
0206     /*
0207      * We still want to trace interrupts coming in if
0208      * max_depth is set to 1. Make sure the decrement is
0209      * seen before ftrace_graph_return.
0210      */
0211     barrier();
0212 }
0213 
0214 /*
0215  * Hibernation protection.
0216  * The state of the current task is too much unstable during
0217  * suspend/restore to disk. We want to protect against that.
0218  */
0219 static int
0220 ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
0221                             void *unused)
0222 {
0223     switch (state) {
0224     case PM_HIBERNATION_PREPARE:
0225         pause_graph_tracing();
0226         break;
0227 
0228     case PM_POST_HIBERNATION:
0229         unpause_graph_tracing();
0230         break;
0231     }
0232     return NOTIFY_DONE;
0233 }
0234 
0235 static struct notifier_block ftrace_suspend_notifier = {
0236     .notifier_call = ftrace_suspend_notifier_call,
0237 };
0238 
0239 /*
0240  * Send the trace to the ring-buffer.
0241  * @return the original return address.
0242  */
0243 unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
0244 {
0245     struct ftrace_graph_ret trace;
0246     unsigned long ret;
0247 
0248     ftrace_pop_return_trace(&trace, &ret, frame_pointer);
0249     trace.rettime = trace_clock_local();
0250     ftrace_graph_return(&trace);
0251     /*
0252      * The ftrace_graph_return() may still access the current
0253      * ret_stack structure, we need to make sure the update of
0254      * curr_ret_stack is after that.
0255      */
0256     barrier();
0257     current->curr_ret_stack--;
0258 
0259     if (unlikely(!ret)) {
0260         ftrace_graph_stop();
0261         WARN_ON(1);
0262         /* Might as well panic. What else to do? */
0263         ret = (unsigned long)panic;
0264     }
0265 
0266     return ret;
0267 }
0268 
0269 /**
0270  * ftrace_graph_get_ret_stack - return the entry of the shadow stack
0271  * @task: The task to read the shadow stack from
0272  * @idx: Index down the shadow stack
0273  *
0274  * Return the ret_struct on the shadow stack of the @task at the
0275  * call graph at @idx starting with zero. If @idx is zero, it
0276  * will return the last saved ret_stack entry. If it is greater than
0277  * zero, it will return the corresponding ret_stack for the depth
0278  * of saved return addresses.
0279  */
0280 struct ftrace_ret_stack *
0281 ftrace_graph_get_ret_stack(struct task_struct *task, int idx)
0282 {
0283     idx = task->curr_ret_stack - idx;
0284 
0285     if (idx >= 0 && idx <= task->curr_ret_stack)
0286         return &task->ret_stack[idx];
0287 
0288     return NULL;
0289 }
0290 
0291 /**
0292  * ftrace_graph_ret_addr - convert a potentially modified stack return address
0293  *             to its original value
0294  *
0295  * This function can be called by stack unwinding code to convert a found stack
0296  * return address ('ret') to its original value, in case the function graph
0297  * tracer has modified it to be 'return_to_handler'.  If the address hasn't
0298  * been modified, the unchanged value of 'ret' is returned.
0299  *
0300  * 'idx' is a state variable which should be initialized by the caller to zero
0301  * before the first call.
0302  *
0303  * 'retp' is a pointer to the return address on the stack.  It's ignored if
0304  * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined.
0305  */
0306 #ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
0307 unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
0308                     unsigned long ret, unsigned long *retp)
0309 {
0310     int index = task->curr_ret_stack;
0311     int i;
0312 
0313     if (ret != (unsigned long)dereference_kernel_function_descriptor(return_to_handler))
0314         return ret;
0315 
0316     if (index < 0)
0317         return ret;
0318 
0319     for (i = 0; i <= index; i++)
0320         if (task->ret_stack[i].retp == retp)
0321             return task->ret_stack[i].ret;
0322 
0323     return ret;
0324 }
0325 #else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
0326 unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
0327                     unsigned long ret, unsigned long *retp)
0328 {
0329     int task_idx;
0330 
0331     if (ret != (unsigned long)dereference_kernel_function_descriptor(return_to_handler))
0332         return ret;
0333 
0334     task_idx = task->curr_ret_stack;
0335 
0336     if (!task->ret_stack || task_idx < *idx)
0337         return ret;
0338 
0339     task_idx -= *idx;
0340     (*idx)++;
0341 
0342     return task->ret_stack[task_idx].ret;
0343 }
0344 #endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
0345 
0346 static struct ftrace_ops graph_ops = {
0347     .func           = ftrace_graph_func,
0348     .flags          = FTRACE_OPS_FL_INITIALIZED |
0349                    FTRACE_OPS_FL_PID |
0350                    FTRACE_OPS_GRAPH_STUB,
0351 #ifdef FTRACE_GRAPH_TRAMP_ADDR
0352     .trampoline     = FTRACE_GRAPH_TRAMP_ADDR,
0353     /* trampoline_size is only needed for dynamically allocated tramps */
0354 #endif
0355     ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash)
0356 };
0357 
0358 void ftrace_graph_sleep_time_control(bool enable)
0359 {
0360     fgraph_sleep_time = enable;
0361 }
0362 
0363 int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
0364 {
0365     return 0;
0366 }
0367 
0368 /*
0369  * Simply points to ftrace_stub, but with the proper protocol.
0370  * Defined by the linker script in linux/vmlinux.lds.h
0371  */
0372 extern void ftrace_stub_graph(struct ftrace_graph_ret *);
0373 
0374 /* The callbacks that hook a function */
0375 trace_func_graph_ret_t ftrace_graph_return = ftrace_stub_graph;
0376 trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
0377 static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;
0378 
0379 /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
0380 static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
0381 {
0382     int i;
0383     int ret = 0;
0384     int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE;
0385     struct task_struct *g, *t;
0386 
0387     for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) {
0388         ret_stack_list[i] =
0389             kmalloc_array(FTRACE_RETFUNC_DEPTH,
0390                       sizeof(struct ftrace_ret_stack),
0391                       GFP_KERNEL);
0392         if (!ret_stack_list[i]) {
0393             start = 0;
0394             end = i;
0395             ret = -ENOMEM;
0396             goto free;
0397         }
0398     }
0399 
0400     rcu_read_lock();
0401     for_each_process_thread(g, t) {
0402         if (start == end) {
0403             ret = -EAGAIN;
0404             goto unlock;
0405         }
0406 
0407         if (t->ret_stack == NULL) {
0408             atomic_set(&t->trace_overrun, 0);
0409             t->curr_ret_stack = -1;
0410             t->curr_ret_depth = -1;
0411             /* Make sure the tasks see the -1 first: */
0412             smp_wmb();
0413             t->ret_stack = ret_stack_list[start++];
0414         }
0415     }
0416 
0417 unlock:
0418     rcu_read_unlock();
0419 free:
0420     for (i = start; i < end; i++)
0421         kfree(ret_stack_list[i]);
0422     return ret;
0423 }
0424 
0425 static void
0426 ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
0427                 struct task_struct *prev,
0428                 struct task_struct *next,
0429                 unsigned int prev_state)
0430 {
0431     unsigned long long timestamp;
0432     int index;
0433 
0434     /*
0435      * Does the user want to count the time a function was asleep.
0436      * If so, do not update the time stamps.
0437      */
0438     if (fgraph_sleep_time)
0439         return;
0440 
0441     timestamp = trace_clock_local();
0442 
0443     prev->ftrace_timestamp = timestamp;
0444 
0445     /* only process tasks that we timestamped */
0446     if (!next->ftrace_timestamp)
0447         return;
0448 
0449     /*
0450      * Update all the counters in next to make up for the
0451      * time next was sleeping.
0452      */
0453     timestamp -= next->ftrace_timestamp;
0454 
0455     for (index = next->curr_ret_stack; index >= 0; index--)
0456         next->ret_stack[index].calltime += timestamp;
0457 }
0458 
0459 static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace)
0460 {
0461     if (!ftrace_ops_test(&global_ops, trace->func, NULL))
0462         return 0;
0463     return __ftrace_graph_entry(trace);
0464 }
0465 
0466 /*
0467  * The function graph tracer should only trace the functions defined
0468  * by set_ftrace_filter and set_ftrace_notrace. If another function
0469  * tracer ops is registered, the graph tracer requires testing the
0470  * function against the global ops, and not just trace any function
0471  * that any ftrace_ops registered.
0472  */
0473 void update_function_graph_func(void)
0474 {
0475     struct ftrace_ops *op;
0476     bool do_test = false;
0477 
0478     /*
0479      * The graph and global ops share the same set of functions
0480      * to test. If any other ops is on the list, then
0481      * the graph tracing needs to test if its the function
0482      * it should call.
0483      */
0484     do_for_each_ftrace_op(op, ftrace_ops_list) {
0485         if (op != &global_ops && op != &graph_ops &&
0486             op != &ftrace_list_end) {
0487             do_test = true;
0488             /* in double loop, break out with goto */
0489             goto out;
0490         }
0491     } while_for_each_ftrace_op(op);
0492  out:
0493     if (do_test)
0494         ftrace_graph_entry = ftrace_graph_entry_test;
0495     else
0496         ftrace_graph_entry = __ftrace_graph_entry;
0497 }
0498 
0499 static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
0500 
0501 static void
0502 graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
0503 {
0504     atomic_set(&t->trace_overrun, 0);
0505     t->ftrace_timestamp = 0;
0506     /* make curr_ret_stack visible before we add the ret_stack */
0507     smp_wmb();
0508     t->ret_stack = ret_stack;
0509 }
0510 
0511 /*
0512  * Allocate a return stack for the idle task. May be the first
0513  * time through, or it may be done by CPU hotplug online.
0514  */
0515 void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
0516 {
0517     t->curr_ret_stack = -1;
0518     t->curr_ret_depth = -1;
0519     /*
0520      * The idle task has no parent, it either has its own
0521      * stack or no stack at all.
0522      */
0523     if (t->ret_stack)
0524         WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu));
0525 
0526     if (ftrace_graph_active) {
0527         struct ftrace_ret_stack *ret_stack;
0528 
0529         ret_stack = per_cpu(idle_ret_stack, cpu);
0530         if (!ret_stack) {
0531             ret_stack =
0532                 kmalloc_array(FTRACE_RETFUNC_DEPTH,
0533                           sizeof(struct ftrace_ret_stack),
0534                           GFP_KERNEL);
0535             if (!ret_stack)
0536                 return;
0537             per_cpu(idle_ret_stack, cpu) = ret_stack;
0538         }
0539         graph_init_task(t, ret_stack);
0540     }
0541 }
0542 
0543 /* Allocate a return stack for newly created task */
0544 void ftrace_graph_init_task(struct task_struct *t)
0545 {
0546     /* Make sure we do not use the parent ret_stack */
0547     t->ret_stack = NULL;
0548     t->curr_ret_stack = -1;
0549     t->curr_ret_depth = -1;
0550 
0551     if (ftrace_graph_active) {
0552         struct ftrace_ret_stack *ret_stack;
0553 
0554         ret_stack = kmalloc_array(FTRACE_RETFUNC_DEPTH,
0555                       sizeof(struct ftrace_ret_stack),
0556                       GFP_KERNEL);
0557         if (!ret_stack)
0558             return;
0559         graph_init_task(t, ret_stack);
0560     }
0561 }
0562 
0563 void ftrace_graph_exit_task(struct task_struct *t)
0564 {
0565     struct ftrace_ret_stack *ret_stack = t->ret_stack;
0566 
0567     t->ret_stack = NULL;
0568     /* NULL must become visible to IRQs before we free it: */
0569     barrier();
0570 
0571     kfree(ret_stack);
0572 }
0573 
0574 /* Allocate a return stack for each task */
0575 static int start_graph_tracing(void)
0576 {
0577     struct ftrace_ret_stack **ret_stack_list;
0578     int ret, cpu;
0579 
0580     ret_stack_list = kmalloc_array(FTRACE_RETSTACK_ALLOC_SIZE,
0581                        sizeof(struct ftrace_ret_stack *),
0582                        GFP_KERNEL);
0583 
0584     if (!ret_stack_list)
0585         return -ENOMEM;
0586 
0587     /* The cpu_boot init_task->ret_stack will never be freed */
0588     for_each_online_cpu(cpu) {
0589         if (!idle_task(cpu)->ret_stack)
0590             ftrace_graph_init_idle_task(idle_task(cpu), cpu);
0591     }
0592 
0593     do {
0594         ret = alloc_retstack_tasklist(ret_stack_list);
0595     } while (ret == -EAGAIN);
0596 
0597     if (!ret) {
0598         ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
0599         if (ret)
0600             pr_info("ftrace_graph: Couldn't activate tracepoint"
0601                 " probe to kernel_sched_switch\n");
0602     }
0603 
0604     kfree(ret_stack_list);
0605     return ret;
0606 }
0607 
0608 int register_ftrace_graph(struct fgraph_ops *gops)
0609 {
0610     int ret = 0;
0611 
0612     mutex_lock(&ftrace_lock);
0613 
0614     /* we currently allow only one tracer registered at a time */
0615     if (ftrace_graph_active) {
0616         ret = -EBUSY;
0617         goto out;
0618     }
0619 
0620     register_pm_notifier(&ftrace_suspend_notifier);
0621 
0622     ftrace_graph_active++;
0623     ret = start_graph_tracing();
0624     if (ret) {
0625         ftrace_graph_active--;
0626         goto out;
0627     }
0628 
0629     ftrace_graph_return = gops->retfunc;
0630 
0631     /*
0632      * Update the indirect function to the entryfunc, and the
0633      * function that gets called to the entry_test first. Then
0634      * call the update fgraph entry function to determine if
0635      * the entryfunc should be called directly or not.
0636      */
0637     __ftrace_graph_entry = gops->entryfunc;
0638     ftrace_graph_entry = ftrace_graph_entry_test;
0639     update_function_graph_func();
0640 
0641     ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET);
0642 out:
0643     mutex_unlock(&ftrace_lock);
0644     return ret;
0645 }
0646 
0647 void unregister_ftrace_graph(struct fgraph_ops *gops)
0648 {
0649     mutex_lock(&ftrace_lock);
0650 
0651     if (unlikely(!ftrace_graph_active))
0652         goto out;
0653 
0654     ftrace_graph_active--;
0655     ftrace_graph_return = ftrace_stub_graph;
0656     ftrace_graph_entry = ftrace_graph_entry_stub;
0657     __ftrace_graph_entry = ftrace_graph_entry_stub;
0658     ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET);
0659     unregister_pm_notifier(&ftrace_suspend_notifier);
0660     unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
0661 
0662  out:
0663     mutex_unlock(&ftrace_lock);
0664 }