Back to home page

LXR

 
 

    


0001 /*
0002  * Copyright (C) 2008-2014 Mathieu Desnoyers
0003  *
0004  * This program is free software; you can redistribute it and/or modify
0005  * it under the terms of the GNU General Public License as published by
0006  * the Free Software Foundation; either version 2 of the License, or
0007  * (at your option) any later version.
0008  *
0009  * This program is distributed in the hope that it will be useful,
0010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0012  * GNU General Public License for more details.
0013  *
0014  * You should have received a copy of the GNU General Public License
0015  * along with this program; if not, write to the Free Software
0016  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
0017  */
0018 #include <linux/module.h>
0019 #include <linux/mutex.h>
0020 #include <linux/types.h>
0021 #include <linux/jhash.h>
0022 #include <linux/list.h>
0023 #include <linux/rcupdate.h>
0024 #include <linux/tracepoint.h>
0025 #include <linux/err.h>
0026 #include <linux/slab.h>
0027 #include <linux/sched.h>
0028 #include <linux/static_key.h>
0029 
0030 extern struct tracepoint * const __start___tracepoints_ptrs[];
0031 extern struct tracepoint * const __stop___tracepoints_ptrs[];
0032 
0033 /* Set to 1 to enable tracepoint debug output */
0034 static const int tracepoint_debug;
0035 
0036 #ifdef CONFIG_MODULES
0037 /*
0038  * Tracepoint module list mutex protects the local module list.
0039  */
0040 static DEFINE_MUTEX(tracepoint_module_list_mutex);
0041 
0042 /* Local list of struct tp_module */
0043 static LIST_HEAD(tracepoint_module_list);
0044 #endif /* CONFIG_MODULES */
0045 
0046 /*
0047  * tracepoints_mutex protects the builtin and module tracepoints.
0048  * tracepoints_mutex nests inside tracepoint_module_list_mutex.
0049  */
0050 static DEFINE_MUTEX(tracepoints_mutex);
0051 
0052 /*
0053  * Note about RCU :
0054  * It is used to delay the free of multiple probes array until a quiescent
0055  * state is reached.
0056  */
0057 struct tp_probes {
0058     struct rcu_head rcu;
0059     struct tracepoint_func probes[0];
0060 };
0061 
0062 static inline void *allocate_probes(int count)
0063 {
0064     struct tp_probes *p  = kmalloc(count * sizeof(struct tracepoint_func)
0065             + sizeof(struct tp_probes), GFP_KERNEL);
0066     return p == NULL ? NULL : p->probes;
0067 }
0068 
0069 static void rcu_free_old_probes(struct rcu_head *head)
0070 {
0071     kfree(container_of(head, struct tp_probes, rcu));
0072 }
0073 
0074 static inline void release_probes(struct tracepoint_func *old)
0075 {
0076     if (old) {
0077         struct tp_probes *tp_probes = container_of(old,
0078             struct tp_probes, probes[0]);
0079         call_rcu_sched(&tp_probes->rcu, rcu_free_old_probes);
0080     }
0081 }
0082 
0083 static void debug_print_probes(struct tracepoint_func *funcs)
0084 {
0085     int i;
0086 
0087     if (!tracepoint_debug || !funcs)
0088         return;
0089 
0090     for (i = 0; funcs[i].func; i++)
0091         printk(KERN_DEBUG "Probe %d : %p\n", i, funcs[i].func);
0092 }
0093 
0094 static struct tracepoint_func *
0095 func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func,
0096      int prio)
0097 {
0098     struct tracepoint_func *old, *new;
0099     int nr_probes = 0;
0100     int pos = -1;
0101 
0102     if (WARN_ON(!tp_func->func))
0103         return ERR_PTR(-EINVAL);
0104 
0105     debug_print_probes(*funcs);
0106     old = *funcs;
0107     if (old) {
0108         /* (N -> N+1), (N != 0, 1) probes */
0109         for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
0110             /* Insert before probes of lower priority */
0111             if (pos < 0 && old[nr_probes].prio < prio)
0112                 pos = nr_probes;
0113             if (old[nr_probes].func == tp_func->func &&
0114                 old[nr_probes].data == tp_func->data)
0115                 return ERR_PTR(-EEXIST);
0116         }
0117     }
0118     /* + 2 : one for new probe, one for NULL func */
0119     new = allocate_probes(nr_probes + 2);
0120     if (new == NULL)
0121         return ERR_PTR(-ENOMEM);
0122     if (old) {
0123         if (pos < 0) {
0124             pos = nr_probes;
0125             memcpy(new, old, nr_probes * sizeof(struct tracepoint_func));
0126         } else {
0127             /* Copy higher priority probes ahead of the new probe */
0128             memcpy(new, old, pos * sizeof(struct tracepoint_func));
0129             /* Copy the rest after it. */
0130             memcpy(new + pos + 1, old + pos,
0131                    (nr_probes - pos) * sizeof(struct tracepoint_func));
0132         }
0133     } else
0134         pos = 0;
0135     new[pos] = *tp_func;
0136     new[nr_probes + 1].func = NULL;
0137     *funcs = new;
0138     debug_print_probes(*funcs);
0139     return old;
0140 }
0141 
0142 static void *func_remove(struct tracepoint_func **funcs,
0143         struct tracepoint_func *tp_func)
0144 {
0145     int nr_probes = 0, nr_del = 0, i;
0146     struct tracepoint_func *old, *new;
0147 
0148     old = *funcs;
0149 
0150     if (!old)
0151         return ERR_PTR(-ENOENT);
0152 
0153     debug_print_probes(*funcs);
0154     /* (N -> M), (N > 1, M >= 0) probes */
0155     if (tp_func->func) {
0156         for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
0157             if (old[nr_probes].func == tp_func->func &&
0158                  old[nr_probes].data == tp_func->data)
0159                 nr_del++;
0160         }
0161     }
0162 
0163     /*
0164      * If probe is NULL, then nr_probes = nr_del = 0, and then the
0165      * entire entry will be removed.
0166      */
0167     if (nr_probes - nr_del == 0) {
0168         /* N -> 0, (N > 1) */
0169         *funcs = NULL;
0170         debug_print_probes(*funcs);
0171         return old;
0172     } else {
0173         int j = 0;
0174         /* N -> M, (N > 1, M > 0) */
0175         /* + 1 for NULL */
0176         new = allocate_probes(nr_probes - nr_del + 1);
0177         if (new == NULL)
0178             return ERR_PTR(-ENOMEM);
0179         for (i = 0; old[i].func; i++)
0180             if (old[i].func != tp_func->func
0181                     || old[i].data != tp_func->data)
0182                 new[j++] = old[i];
0183         new[nr_probes - nr_del].func = NULL;
0184         *funcs = new;
0185     }
0186     debug_print_probes(*funcs);
0187     return old;
0188 }
0189 
0190 /*
0191  * Add the probe function to a tracepoint.
0192  */
0193 static int tracepoint_add_func(struct tracepoint *tp,
0194                    struct tracepoint_func *func, int prio)
0195 {
0196     struct tracepoint_func *old, *tp_funcs;
0197     int ret;
0198 
0199     if (tp->regfunc && !static_key_enabled(&tp->key)) {
0200         ret = tp->regfunc();
0201         if (ret < 0)
0202             return ret;
0203     }
0204 
0205     tp_funcs = rcu_dereference_protected(tp->funcs,
0206             lockdep_is_held(&tracepoints_mutex));
0207     old = func_add(&tp_funcs, func, prio);
0208     if (IS_ERR(old)) {
0209         WARN_ON_ONCE(1);
0210         return PTR_ERR(old);
0211     }
0212 
0213     /*
0214      * rcu_assign_pointer has a smp_wmb() which makes sure that the new
0215      * probe callbacks array is consistent before setting a pointer to it.
0216      * This array is referenced by __DO_TRACE from
0217      * include/linux/tracepoints.h. A matching smp_read_barrier_depends()
0218      * is used.
0219      */
0220     rcu_assign_pointer(tp->funcs, tp_funcs);
0221     if (!static_key_enabled(&tp->key))
0222         static_key_slow_inc(&tp->key);
0223     release_probes(old);
0224     return 0;
0225 }
0226 
0227 /*
0228  * Remove a probe function from a tracepoint.
0229  * Note: only waiting an RCU period after setting elem->call to the empty
0230  * function insures that the original callback is not used anymore. This insured
0231  * by preempt_disable around the call site.
0232  */
0233 static int tracepoint_remove_func(struct tracepoint *tp,
0234         struct tracepoint_func *func)
0235 {
0236     struct tracepoint_func *old, *tp_funcs;
0237 
0238     tp_funcs = rcu_dereference_protected(tp->funcs,
0239             lockdep_is_held(&tracepoints_mutex));
0240     old = func_remove(&tp_funcs, func);
0241     if (IS_ERR(old)) {
0242         WARN_ON_ONCE(1);
0243         return PTR_ERR(old);
0244     }
0245 
0246     if (!tp_funcs) {
0247         /* Removed last function */
0248         if (tp->unregfunc && static_key_enabled(&tp->key))
0249             tp->unregfunc();
0250 
0251         if (static_key_enabled(&tp->key))
0252             static_key_slow_dec(&tp->key);
0253     }
0254     rcu_assign_pointer(tp->funcs, tp_funcs);
0255     release_probes(old);
0256     return 0;
0257 }
0258 
0259 /**
0260  * tracepoint_probe_register -  Connect a probe to a tracepoint
0261  * @tp: tracepoint
0262  * @probe: probe handler
0263  * @data: tracepoint data
0264  * @prio: priority of this function over other registered functions
0265  *
0266  * Returns 0 if ok, error value on error.
0267  * Note: if @tp is within a module, the caller is responsible for
0268  * unregistering the probe before the module is gone. This can be
0269  * performed either with a tracepoint module going notifier, or from
0270  * within module exit functions.
0271  */
0272 int tracepoint_probe_register_prio(struct tracepoint *tp, void *probe,
0273                    void *data, int prio)
0274 {
0275     struct tracepoint_func tp_func;
0276     int ret;
0277 
0278     mutex_lock(&tracepoints_mutex);
0279     tp_func.func = probe;
0280     tp_func.data = data;
0281     tp_func.prio = prio;
0282     ret = tracepoint_add_func(tp, &tp_func, prio);
0283     mutex_unlock(&tracepoints_mutex);
0284     return ret;
0285 }
0286 EXPORT_SYMBOL_GPL(tracepoint_probe_register_prio);
0287 
0288 /**
0289  * tracepoint_probe_register -  Connect a probe to a tracepoint
0290  * @tp: tracepoint
0291  * @probe: probe handler
0292  * @data: tracepoint data
0293  * @prio: priority of this function over other registered functions
0294  *
0295  * Returns 0 if ok, error value on error.
0296  * Note: if @tp is within a module, the caller is responsible for
0297  * unregistering the probe before the module is gone. This can be
0298  * performed either with a tracepoint module going notifier, or from
0299  * within module exit functions.
0300  */
0301 int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data)
0302 {
0303     return tracepoint_probe_register_prio(tp, probe, data, TRACEPOINT_DEFAULT_PRIO);
0304 }
0305 EXPORT_SYMBOL_GPL(tracepoint_probe_register);
0306 
0307 /**
0308  * tracepoint_probe_unregister -  Disconnect a probe from a tracepoint
0309  * @tp: tracepoint
0310  * @probe: probe function pointer
0311  * @data: tracepoint data
0312  *
0313  * Returns 0 if ok, error value on error.
0314  */
0315 int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data)
0316 {
0317     struct tracepoint_func tp_func;
0318     int ret;
0319 
0320     mutex_lock(&tracepoints_mutex);
0321     tp_func.func = probe;
0322     tp_func.data = data;
0323     ret = tracepoint_remove_func(tp, &tp_func);
0324     mutex_unlock(&tracepoints_mutex);
0325     return ret;
0326 }
0327 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
0328 
0329 #ifdef CONFIG_MODULES
0330 bool trace_module_has_bad_taint(struct module *mod)
0331 {
0332     return mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP) |
0333                    (1 << TAINT_UNSIGNED_MODULE));
0334 }
0335 
0336 static BLOCKING_NOTIFIER_HEAD(tracepoint_notify_list);
0337 
0338 /**
0339  * register_tracepoint_notifier - register tracepoint coming/going notifier
0340  * @nb: notifier block
0341  *
0342  * Notifiers registered with this function are called on module
0343  * coming/going with the tracepoint_module_list_mutex held.
0344  * The notifier block callback should expect a "struct tp_module" data
0345  * pointer.
0346  */
0347 int register_tracepoint_module_notifier(struct notifier_block *nb)
0348 {
0349     struct tp_module *tp_mod;
0350     int ret;
0351 
0352     mutex_lock(&tracepoint_module_list_mutex);
0353     ret = blocking_notifier_chain_register(&tracepoint_notify_list, nb);
0354     if (ret)
0355         goto end;
0356     list_for_each_entry(tp_mod, &tracepoint_module_list, list)
0357         (void) nb->notifier_call(nb, MODULE_STATE_COMING, tp_mod);
0358 end:
0359     mutex_unlock(&tracepoint_module_list_mutex);
0360     return ret;
0361 }
0362 EXPORT_SYMBOL_GPL(register_tracepoint_module_notifier);
0363 
0364 /**
0365  * unregister_tracepoint_notifier - unregister tracepoint coming/going notifier
0366  * @nb: notifier block
0367  *
0368  * The notifier block callback should expect a "struct tp_module" data
0369  * pointer.
0370  */
0371 int unregister_tracepoint_module_notifier(struct notifier_block *nb)
0372 {
0373     struct tp_module *tp_mod;
0374     int ret;
0375 
0376     mutex_lock(&tracepoint_module_list_mutex);
0377     ret = blocking_notifier_chain_unregister(&tracepoint_notify_list, nb);
0378     if (ret)
0379         goto end;
0380     list_for_each_entry(tp_mod, &tracepoint_module_list, list)
0381         (void) nb->notifier_call(nb, MODULE_STATE_GOING, tp_mod);
0382 end:
0383     mutex_unlock(&tracepoint_module_list_mutex);
0384     return ret;
0385 
0386 }
0387 EXPORT_SYMBOL_GPL(unregister_tracepoint_module_notifier);
0388 
0389 /*
0390  * Ensure the tracer unregistered the module's probes before the module
0391  * teardown is performed. Prevents leaks of probe and data pointers.
0392  */
0393 static void tp_module_going_check_quiescent(struct tracepoint * const *begin,
0394         struct tracepoint * const *end)
0395 {
0396     struct tracepoint * const *iter;
0397 
0398     if (!begin)
0399         return;
0400     for (iter = begin; iter < end; iter++)
0401         WARN_ON_ONCE((*iter)->funcs);
0402 }
0403 
0404 static int tracepoint_module_coming(struct module *mod)
0405 {
0406     struct tp_module *tp_mod;
0407     int ret = 0;
0408 
0409     if (!mod->num_tracepoints)
0410         return 0;
0411 
0412     /*
0413      * We skip modules that taint the kernel, especially those with different
0414      * module headers (for forced load), to make sure we don't cause a crash.
0415      * Staging, out-of-tree, and unsigned GPL modules are fine.
0416      */
0417     if (trace_module_has_bad_taint(mod))
0418         return 0;
0419     mutex_lock(&tracepoint_module_list_mutex);
0420     tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL);
0421     if (!tp_mod) {
0422         ret = -ENOMEM;
0423         goto end;
0424     }
0425     tp_mod->mod = mod;
0426     list_add_tail(&tp_mod->list, &tracepoint_module_list);
0427     blocking_notifier_call_chain(&tracepoint_notify_list,
0428             MODULE_STATE_COMING, tp_mod);
0429 end:
0430     mutex_unlock(&tracepoint_module_list_mutex);
0431     return ret;
0432 }
0433 
0434 static void tracepoint_module_going(struct module *mod)
0435 {
0436     struct tp_module *tp_mod;
0437 
0438     if (!mod->num_tracepoints)
0439         return;
0440 
0441     mutex_lock(&tracepoint_module_list_mutex);
0442     list_for_each_entry(tp_mod, &tracepoint_module_list, list) {
0443         if (tp_mod->mod == mod) {
0444             blocking_notifier_call_chain(&tracepoint_notify_list,
0445                     MODULE_STATE_GOING, tp_mod);
0446             list_del(&tp_mod->list);
0447             kfree(tp_mod);
0448             /*
0449              * Called the going notifier before checking for
0450              * quiescence.
0451              */
0452             tp_module_going_check_quiescent(mod->tracepoints_ptrs,
0453                 mod->tracepoints_ptrs + mod->num_tracepoints);
0454             break;
0455         }
0456     }
0457     /*
0458      * In the case of modules that were tainted at "coming", we'll simply
0459      * walk through the list without finding it. We cannot use the "tainted"
0460      * flag on "going", in case a module taints the kernel only after being
0461      * loaded.
0462      */
0463     mutex_unlock(&tracepoint_module_list_mutex);
0464 }
0465 
0466 static int tracepoint_module_notify(struct notifier_block *self,
0467         unsigned long val, void *data)
0468 {
0469     struct module *mod = data;
0470     int ret = 0;
0471 
0472     switch (val) {
0473     case MODULE_STATE_COMING:
0474         ret = tracepoint_module_coming(mod);
0475         break;
0476     case MODULE_STATE_LIVE:
0477         break;
0478     case MODULE_STATE_GOING:
0479         tracepoint_module_going(mod);
0480         break;
0481     case MODULE_STATE_UNFORMED:
0482         break;
0483     }
0484     return ret;
0485 }
0486 
0487 static struct notifier_block tracepoint_module_nb = {
0488     .notifier_call = tracepoint_module_notify,
0489     .priority = 0,
0490 };
0491 
0492 static __init int init_tracepoints(void)
0493 {
0494     int ret;
0495 
0496     ret = register_module_notifier(&tracepoint_module_nb);
0497     if (ret)
0498         pr_warn("Failed to register tracepoint module enter notifier\n");
0499 
0500     return ret;
0501 }
0502 __initcall(init_tracepoints);
0503 #endif /* CONFIG_MODULES */
0504 
0505 static void for_each_tracepoint_range(struct tracepoint * const *begin,
0506         struct tracepoint * const *end,
0507         void (*fct)(struct tracepoint *tp, void *priv),
0508         void *priv)
0509 {
0510     struct tracepoint * const *iter;
0511 
0512     if (!begin)
0513         return;
0514     for (iter = begin; iter < end; iter++)
0515         fct(*iter, priv);
0516 }
0517 
0518 /**
0519  * for_each_kernel_tracepoint - iteration on all kernel tracepoints
0520  * @fct: callback
0521  * @priv: private data
0522  */
0523 void for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
0524         void *priv)
0525 {
0526     for_each_tracepoint_range(__start___tracepoints_ptrs,
0527         __stop___tracepoints_ptrs, fct, priv);
0528 }
0529 EXPORT_SYMBOL_GPL(for_each_kernel_tracepoint);
0530 
0531 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
0532 
0533 /* NB: reg/unreg are called while guarded with the tracepoints_mutex */
0534 static int sys_tracepoint_refcount;
0535 
0536 int syscall_regfunc(void)
0537 {
0538     struct task_struct *p, *t;
0539 
0540     if (!sys_tracepoint_refcount) {
0541         read_lock(&tasklist_lock);
0542         for_each_process_thread(p, t) {
0543             set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
0544         }
0545         read_unlock(&tasklist_lock);
0546     }
0547     sys_tracepoint_refcount++;
0548 
0549     return 0;
0550 }
0551 
0552 void syscall_unregfunc(void)
0553 {
0554     struct task_struct *p, *t;
0555 
0556     sys_tracepoint_refcount--;
0557     if (!sys_tracepoint_refcount) {
0558         read_lock(&tasklist_lock);
0559         for_each_process_thread(p, t) {
0560             clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
0561         }
0562         read_unlock(&tasklist_lock);
0563     }
0564 }
0565 #endif