Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * ring buffer based function tracer
0004  *
0005  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
0006  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
0007  *
0008  * Originally taken from the RT patch by:
0009  *    Arnaldo Carvalho de Melo <acme@redhat.com>
0010  *
0011  * Based on code from the latency_tracer, that is:
0012  *  Copyright (C) 2004-2006 Ingo Molnar
0013  *  Copyright (C) 2004 Nadia Yvette Chambers
0014  */
0015 #include <linux/ring_buffer.h>
0016 #include <generated/utsrelease.h>
0017 #include <linux/stacktrace.h>
0018 #include <linux/writeback.h>
0019 #include <linux/kallsyms.h>
0020 #include <linux/security.h>
0021 #include <linux/seq_file.h>
0022 #include <linux/notifier.h>
0023 #include <linux/irqflags.h>
0024 #include <linux/debugfs.h>
0025 #include <linux/tracefs.h>
0026 #include <linux/pagemap.h>
0027 #include <linux/hardirq.h>
0028 #include <linux/linkage.h>
0029 #include <linux/uaccess.h>
0030 #include <linux/vmalloc.h>
0031 #include <linux/ftrace.h>
0032 #include <linux/module.h>
0033 #include <linux/percpu.h>
0034 #include <linux/splice.h>
0035 #include <linux/kdebug.h>
0036 #include <linux/string.h>
0037 #include <linux/mount.h>
0038 #include <linux/rwsem.h>
0039 #include <linux/slab.h>
0040 #include <linux/ctype.h>
0041 #include <linux/init.h>
0042 #include <linux/panic_notifier.h>
0043 #include <linux/poll.h>
0044 #include <linux/nmi.h>
0045 #include <linux/fs.h>
0046 #include <linux/trace.h>
0047 #include <linux/sched/clock.h>
0048 #include <linux/sched/rt.h>
0049 #include <linux/fsnotify.h>
0050 #include <linux/irq_work.h>
0051 #include <linux/workqueue.h>
0052 
0053 #include "trace.h"
0054 #include "trace_output.h"
0055 
0056 /*
0057  * On boot up, the ring buffer is set to the minimum size, so that
0058  * we do not waste memory on systems that are not using tracing.
0059  */
0060 bool ring_buffer_expanded;
0061 
0062 /*
0063  * We need to change this state when a selftest is running.
0064  * A selftest will lurk into the ring-buffer to count the
0065  * entries inserted during the selftest although some concurrent
0066  * insertions into the ring-buffer such as trace_printk could occurred
0067  * at the same time, giving false positive or negative results.
0068  */
0069 static bool __read_mostly tracing_selftest_running;
0070 
0071 /*
0072  * If boot-time tracing including tracers/events via kernel cmdline
0073  * is running, we do not want to run SELFTEST.
0074  */
0075 bool __read_mostly tracing_selftest_disabled;
0076 
0077 #ifdef CONFIG_FTRACE_STARTUP_TEST
0078 void __init disable_tracing_selftest(const char *reason)
0079 {
0080     if (!tracing_selftest_disabled) {
0081         tracing_selftest_disabled = true;
0082         pr_info("Ftrace startup test is disabled due to %s\n", reason);
0083     }
0084 }
0085 #endif
0086 
0087 /* Pipe tracepoints to printk */
0088 struct trace_iterator *tracepoint_print_iter;
0089 int tracepoint_printk;
0090 static bool tracepoint_printk_stop_on_boot __initdata;
0091 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
0092 
0093 /* For tracers that don't implement custom flags */
0094 static struct tracer_opt dummy_tracer_opt[] = {
0095     { }
0096 };
0097 
0098 static int
0099 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
0100 {
0101     return 0;
0102 }
0103 
0104 /*
0105  * To prevent the comm cache from being overwritten when no
0106  * tracing is active, only save the comm when a trace event
0107  * occurred.
0108  */
0109 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
0110 
0111 /*
0112  * Kill all tracing for good (never come back).
0113  * It is initialized to 1 but will turn to zero if the initialization
0114  * of the tracer is successful. But that is the only place that sets
0115  * this back to zero.
0116  */
0117 static int tracing_disabled = 1;
0118 
0119 cpumask_var_t __read_mostly tracing_buffer_mask;
0120 
0121 /*
0122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
0123  *
0124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
0125  * is set, then ftrace_dump is called. This will output the contents
0126  * of the ftrace buffers to the console.  This is very useful for
0127  * capturing traces that lead to crashes and outputing it to a
0128  * serial console.
0129  *
0130  * It is default off, but you can enable it with either specifying
0131  * "ftrace_dump_on_oops" in the kernel command line, or setting
0132  * /proc/sys/kernel/ftrace_dump_on_oops
0133  * Set 1 if you want to dump buffers of all CPUs
0134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
0135  */
0136 
0137 enum ftrace_dump_mode ftrace_dump_on_oops;
0138 
0139 /* When set, tracing will stop when a WARN*() is hit */
0140 int __disable_trace_on_warning;
0141 
0142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
0143 /* Map of enums to their values, for "eval_map" file */
0144 struct trace_eval_map_head {
0145     struct module           *mod;
0146     unsigned long           length;
0147 };
0148 
0149 union trace_eval_map_item;
0150 
0151 struct trace_eval_map_tail {
0152     /*
0153      * "end" is first and points to NULL as it must be different
0154      * than "mod" or "eval_string"
0155      */
0156     union trace_eval_map_item   *next;
0157     const char          *end;   /* points to NULL */
0158 };
0159 
0160 static DEFINE_MUTEX(trace_eval_mutex);
0161 
0162 /*
0163  * The trace_eval_maps are saved in an array with two extra elements,
0164  * one at the beginning, and one at the end. The beginning item contains
0165  * the count of the saved maps (head.length), and the module they
0166  * belong to if not built in (head.mod). The ending item contains a
0167  * pointer to the next array of saved eval_map items.
0168  */
0169 union trace_eval_map_item {
0170     struct trace_eval_map       map;
0171     struct trace_eval_map_head  head;
0172     struct trace_eval_map_tail  tail;
0173 };
0174 
0175 static union trace_eval_map_item *trace_eval_maps;
0176 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
0177 
0178 int tracing_set_tracer(struct trace_array *tr, const char *buf);
0179 static void ftrace_trace_userstack(struct trace_array *tr,
0180                    struct trace_buffer *buffer,
0181                    unsigned int trace_ctx);
0182 
0183 #define MAX_TRACER_SIZE     100
0184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
0185 static char *default_bootup_tracer;
0186 
0187 static bool allocate_snapshot;
0188 static bool snapshot_at_boot;
0189 
0190 static int __init set_cmdline_ftrace(char *str)
0191 {
0192     strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
0193     default_bootup_tracer = bootup_tracer_buf;
0194     /* We are using ftrace early, expand it */
0195     ring_buffer_expanded = true;
0196     return 1;
0197 }
0198 __setup("ftrace=", set_cmdline_ftrace);
0199 
0200 static int __init set_ftrace_dump_on_oops(char *str)
0201 {
0202     if (*str++ != '=' || !*str || !strcmp("1", str)) {
0203         ftrace_dump_on_oops = DUMP_ALL;
0204         return 1;
0205     }
0206 
0207     if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
0208         ftrace_dump_on_oops = DUMP_ORIG;
0209                 return 1;
0210         }
0211 
0212         return 0;
0213 }
0214 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
0215 
0216 static int __init stop_trace_on_warning(char *str)
0217 {
0218     if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
0219         __disable_trace_on_warning = 1;
0220     return 1;
0221 }
0222 __setup("traceoff_on_warning", stop_trace_on_warning);
0223 
0224 static int __init boot_alloc_snapshot(char *str)
0225 {
0226     allocate_snapshot = true;
0227     /* We also need the main ring buffer expanded */
0228     ring_buffer_expanded = true;
0229     return 1;
0230 }
0231 __setup("alloc_snapshot", boot_alloc_snapshot);
0232 
0233 
0234 static int __init boot_snapshot(char *str)
0235 {
0236     snapshot_at_boot = true;
0237     boot_alloc_snapshot(str);
0238     return 1;
0239 }
0240 __setup("ftrace_boot_snapshot", boot_snapshot);
0241 
0242 
0243 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
0244 
0245 static int __init set_trace_boot_options(char *str)
0246 {
0247     strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
0248     return 1;
0249 }
0250 __setup("trace_options=", set_trace_boot_options);
0251 
0252 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
0253 static char *trace_boot_clock __initdata;
0254 
0255 static int __init set_trace_boot_clock(char *str)
0256 {
0257     strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
0258     trace_boot_clock = trace_boot_clock_buf;
0259     return 1;
0260 }
0261 __setup("trace_clock=", set_trace_boot_clock);
0262 
0263 static int __init set_tracepoint_printk(char *str)
0264 {
0265     /* Ignore the "tp_printk_stop_on_boot" param */
0266     if (*str == '_')
0267         return 0;
0268 
0269     if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
0270         tracepoint_printk = 1;
0271     return 1;
0272 }
0273 __setup("tp_printk", set_tracepoint_printk);
0274 
0275 static int __init set_tracepoint_printk_stop(char *str)
0276 {
0277     tracepoint_printk_stop_on_boot = true;
0278     return 1;
0279 }
0280 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
0281 
0282 unsigned long long ns2usecs(u64 nsec)
0283 {
0284     nsec += 500;
0285     do_div(nsec, 1000);
0286     return nsec;
0287 }
0288 
0289 static void
0290 trace_process_export(struct trace_export *export,
0291            struct ring_buffer_event *event, int flag)
0292 {
0293     struct trace_entry *entry;
0294     unsigned int size = 0;
0295 
0296     if (export->flags & flag) {
0297         entry = ring_buffer_event_data(event);
0298         size = ring_buffer_event_length(event);
0299         export->write(export, entry, size);
0300     }
0301 }
0302 
0303 static DEFINE_MUTEX(ftrace_export_lock);
0304 
0305 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
0306 
0307 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
0308 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
0309 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
0310 
0311 static inline void ftrace_exports_enable(struct trace_export *export)
0312 {
0313     if (export->flags & TRACE_EXPORT_FUNCTION)
0314         static_branch_inc(&trace_function_exports_enabled);
0315 
0316     if (export->flags & TRACE_EXPORT_EVENT)
0317         static_branch_inc(&trace_event_exports_enabled);
0318 
0319     if (export->flags & TRACE_EXPORT_MARKER)
0320         static_branch_inc(&trace_marker_exports_enabled);
0321 }
0322 
0323 static inline void ftrace_exports_disable(struct trace_export *export)
0324 {
0325     if (export->flags & TRACE_EXPORT_FUNCTION)
0326         static_branch_dec(&trace_function_exports_enabled);
0327 
0328     if (export->flags & TRACE_EXPORT_EVENT)
0329         static_branch_dec(&trace_event_exports_enabled);
0330 
0331     if (export->flags & TRACE_EXPORT_MARKER)
0332         static_branch_dec(&trace_marker_exports_enabled);
0333 }
0334 
0335 static void ftrace_exports(struct ring_buffer_event *event, int flag)
0336 {
0337     struct trace_export *export;
0338 
0339     preempt_disable_notrace();
0340 
0341     export = rcu_dereference_raw_check(ftrace_exports_list);
0342     while (export) {
0343         trace_process_export(export, event, flag);
0344         export = rcu_dereference_raw_check(export->next);
0345     }
0346 
0347     preempt_enable_notrace();
0348 }
0349 
0350 static inline void
0351 add_trace_export(struct trace_export **list, struct trace_export *export)
0352 {
0353     rcu_assign_pointer(export->next, *list);
0354     /*
0355      * We are entering export into the list but another
0356      * CPU might be walking that list. We need to make sure
0357      * the export->next pointer is valid before another CPU sees
0358      * the export pointer included into the list.
0359      */
0360     rcu_assign_pointer(*list, export);
0361 }
0362 
0363 static inline int
0364 rm_trace_export(struct trace_export **list, struct trace_export *export)
0365 {
0366     struct trace_export **p;
0367 
0368     for (p = list; *p != NULL; p = &(*p)->next)
0369         if (*p == export)
0370             break;
0371 
0372     if (*p != export)
0373         return -1;
0374 
0375     rcu_assign_pointer(*p, (*p)->next);
0376 
0377     return 0;
0378 }
0379 
0380 static inline void
0381 add_ftrace_export(struct trace_export **list, struct trace_export *export)
0382 {
0383     ftrace_exports_enable(export);
0384 
0385     add_trace_export(list, export);
0386 }
0387 
0388 static inline int
0389 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
0390 {
0391     int ret;
0392 
0393     ret = rm_trace_export(list, export);
0394     ftrace_exports_disable(export);
0395 
0396     return ret;
0397 }
0398 
0399 int register_ftrace_export(struct trace_export *export)
0400 {
0401     if (WARN_ON_ONCE(!export->write))
0402         return -1;
0403 
0404     mutex_lock(&ftrace_export_lock);
0405 
0406     add_ftrace_export(&ftrace_exports_list, export);
0407 
0408     mutex_unlock(&ftrace_export_lock);
0409 
0410     return 0;
0411 }
0412 EXPORT_SYMBOL_GPL(register_ftrace_export);
0413 
0414 int unregister_ftrace_export(struct trace_export *export)
0415 {
0416     int ret;
0417 
0418     mutex_lock(&ftrace_export_lock);
0419 
0420     ret = rm_ftrace_export(&ftrace_exports_list, export);
0421 
0422     mutex_unlock(&ftrace_export_lock);
0423 
0424     return ret;
0425 }
0426 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
0427 
0428 /* trace_flags holds trace_options default values */
0429 #define TRACE_DEFAULT_FLAGS                     \
0430     (FUNCTION_DEFAULT_FLAGS |                   \
0431      TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |          \
0432      TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |        \
0433      TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |         \
0434      TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |         \
0435      TRACE_ITER_HASH_PTR)
0436 
0437 /* trace_options that are only supported by global_trace */
0438 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |          \
0439            TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
0440 
0441 /* trace_flags that are default zero for instances */
0442 #define ZEROED_TRACE_FLAGS \
0443     (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
0444 
0445 /*
0446  * The global_trace is the descriptor that holds the top-level tracing
0447  * buffers for the live tracing.
0448  */
0449 static struct trace_array global_trace = {
0450     .trace_flags = TRACE_DEFAULT_FLAGS,
0451 };
0452 
0453 LIST_HEAD(ftrace_trace_arrays);
0454 
0455 int trace_array_get(struct trace_array *this_tr)
0456 {
0457     struct trace_array *tr;
0458     int ret = -ENODEV;
0459 
0460     mutex_lock(&trace_types_lock);
0461     list_for_each_entry(tr, &ftrace_trace_arrays, list) {
0462         if (tr == this_tr) {
0463             tr->ref++;
0464             ret = 0;
0465             break;
0466         }
0467     }
0468     mutex_unlock(&trace_types_lock);
0469 
0470     return ret;
0471 }
0472 
0473 static void __trace_array_put(struct trace_array *this_tr)
0474 {
0475     WARN_ON(!this_tr->ref);
0476     this_tr->ref--;
0477 }
0478 
0479 /**
0480  * trace_array_put - Decrement the reference counter for this trace array.
0481  * @this_tr : pointer to the trace array
0482  *
0483  * NOTE: Use this when we no longer need the trace array returned by
0484  * trace_array_get_by_name(). This ensures the trace array can be later
0485  * destroyed.
0486  *
0487  */
0488 void trace_array_put(struct trace_array *this_tr)
0489 {
0490     if (!this_tr)
0491         return;
0492 
0493     mutex_lock(&trace_types_lock);
0494     __trace_array_put(this_tr);
0495     mutex_unlock(&trace_types_lock);
0496 }
0497 EXPORT_SYMBOL_GPL(trace_array_put);
0498 
0499 int tracing_check_open_get_tr(struct trace_array *tr)
0500 {
0501     int ret;
0502 
0503     ret = security_locked_down(LOCKDOWN_TRACEFS);
0504     if (ret)
0505         return ret;
0506 
0507     if (tracing_disabled)
0508         return -ENODEV;
0509 
0510     if (tr && trace_array_get(tr) < 0)
0511         return -ENODEV;
0512 
0513     return 0;
0514 }
0515 
0516 int call_filter_check_discard(struct trace_event_call *call, void *rec,
0517                   struct trace_buffer *buffer,
0518                   struct ring_buffer_event *event)
0519 {
0520     if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
0521         !filter_match_preds(call->filter, rec)) {
0522         __trace_event_discard_commit(buffer, event);
0523         return 1;
0524     }
0525 
0526     return 0;
0527 }
0528 
0529 /**
0530  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
0531  * @filtered_pids: The list of pids to check
0532  * @search_pid: The PID to find in @filtered_pids
0533  *
0534  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
0535  */
0536 bool
0537 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
0538 {
0539     return trace_pid_list_is_set(filtered_pids, search_pid);
0540 }
0541 
0542 /**
0543  * trace_ignore_this_task - should a task be ignored for tracing
0544  * @filtered_pids: The list of pids to check
0545  * @filtered_no_pids: The list of pids not to be traced
0546  * @task: The task that should be ignored if not filtered
0547  *
0548  * Checks if @task should be traced or not from @filtered_pids.
0549  * Returns true if @task should *NOT* be traced.
0550  * Returns false if @task should be traced.
0551  */
0552 bool
0553 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
0554                struct trace_pid_list *filtered_no_pids,
0555                struct task_struct *task)
0556 {
0557     /*
0558      * If filtered_no_pids is not empty, and the task's pid is listed
0559      * in filtered_no_pids, then return true.
0560      * Otherwise, if filtered_pids is empty, that means we can
0561      * trace all tasks. If it has content, then only trace pids
0562      * within filtered_pids.
0563      */
0564 
0565     return (filtered_pids &&
0566         !trace_find_filtered_pid(filtered_pids, task->pid)) ||
0567         (filtered_no_pids &&
0568          trace_find_filtered_pid(filtered_no_pids, task->pid));
0569 }
0570 
0571 /**
0572  * trace_filter_add_remove_task - Add or remove a task from a pid_list
0573  * @pid_list: The list to modify
0574  * @self: The current task for fork or NULL for exit
0575  * @task: The task to add or remove
0576  *
0577  * If adding a task, if @self is defined, the task is only added if @self
0578  * is also included in @pid_list. This happens on fork and tasks should
0579  * only be added when the parent is listed. If @self is NULL, then the
0580  * @task pid will be removed from the list, which would happen on exit
0581  * of a task.
0582  */
0583 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
0584                   struct task_struct *self,
0585                   struct task_struct *task)
0586 {
0587     if (!pid_list)
0588         return;
0589 
0590     /* For forks, we only add if the forking task is listed */
0591     if (self) {
0592         if (!trace_find_filtered_pid(pid_list, self->pid))
0593             return;
0594     }
0595 
0596     /* "self" is set for forks, and NULL for exits */
0597     if (self)
0598         trace_pid_list_set(pid_list, task->pid);
0599     else
0600         trace_pid_list_clear(pid_list, task->pid);
0601 }
0602 
0603 /**
0604  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
0605  * @pid_list: The pid list to show
0606  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
0607  * @pos: The position of the file
0608  *
0609  * This is used by the seq_file "next" operation to iterate the pids
0610  * listed in a trace_pid_list structure.
0611  *
0612  * Returns the pid+1 as we want to display pid of zero, but NULL would
0613  * stop the iteration.
0614  */
0615 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
0616 {
0617     long pid = (unsigned long)v;
0618     unsigned int next;
0619 
0620     (*pos)++;
0621 
0622     /* pid already is +1 of the actual previous bit */
0623     if (trace_pid_list_next(pid_list, pid, &next) < 0)
0624         return NULL;
0625 
0626     pid = next;
0627 
0628     /* Return pid + 1 to allow zero to be represented */
0629     return (void *)(pid + 1);
0630 }
0631 
0632 /**
0633  * trace_pid_start - Used for seq_file to start reading pid lists
0634  * @pid_list: The pid list to show
0635  * @pos: The position of the file
0636  *
0637  * This is used by seq_file "start" operation to start the iteration
0638  * of listing pids.
0639  *
0640  * Returns the pid+1 as we want to display pid of zero, but NULL would
0641  * stop the iteration.
0642  */
0643 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
0644 {
0645     unsigned long pid;
0646     unsigned int first;
0647     loff_t l = 0;
0648 
0649     if (trace_pid_list_first(pid_list, &first) < 0)
0650         return NULL;
0651 
0652     pid = first;
0653 
0654     /* Return pid + 1 so that zero can be the exit value */
0655     for (pid++; pid && l < *pos;
0656          pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
0657         ;
0658     return (void *)pid;
0659 }
0660 
0661 /**
0662  * trace_pid_show - show the current pid in seq_file processing
0663  * @m: The seq_file structure to write into
0664  * @v: A void pointer of the pid (+1) value to display
0665  *
0666  * Can be directly used by seq_file operations to display the current
0667  * pid value.
0668  */
0669 int trace_pid_show(struct seq_file *m, void *v)
0670 {
0671     unsigned long pid = (unsigned long)v - 1;
0672 
0673     seq_printf(m, "%lu\n", pid);
0674     return 0;
0675 }
0676 
0677 /* 128 should be much more than enough */
0678 #define PID_BUF_SIZE        127
0679 
0680 int trace_pid_write(struct trace_pid_list *filtered_pids,
0681             struct trace_pid_list **new_pid_list,
0682             const char __user *ubuf, size_t cnt)
0683 {
0684     struct trace_pid_list *pid_list;
0685     struct trace_parser parser;
0686     unsigned long val;
0687     int nr_pids = 0;
0688     ssize_t read = 0;
0689     ssize_t ret;
0690     loff_t pos;
0691     pid_t pid;
0692 
0693     if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
0694         return -ENOMEM;
0695 
0696     /*
0697      * Always recreate a new array. The write is an all or nothing
0698      * operation. Always create a new array when adding new pids by
0699      * the user. If the operation fails, then the current list is
0700      * not modified.
0701      */
0702     pid_list = trace_pid_list_alloc();
0703     if (!pid_list) {
0704         trace_parser_put(&parser);
0705         return -ENOMEM;
0706     }
0707 
0708     if (filtered_pids) {
0709         /* copy the current bits to the new max */
0710         ret = trace_pid_list_first(filtered_pids, &pid);
0711         while (!ret) {
0712             trace_pid_list_set(pid_list, pid);
0713             ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
0714             nr_pids++;
0715         }
0716     }
0717 
0718     ret = 0;
0719     while (cnt > 0) {
0720 
0721         pos = 0;
0722 
0723         ret = trace_get_user(&parser, ubuf, cnt, &pos);
0724         if (ret < 0)
0725             break;
0726 
0727         read += ret;
0728         ubuf += ret;
0729         cnt -= ret;
0730 
0731         if (!trace_parser_loaded(&parser))
0732             break;
0733 
0734         ret = -EINVAL;
0735         if (kstrtoul(parser.buffer, 0, &val))
0736             break;
0737 
0738         pid = (pid_t)val;
0739 
0740         if (trace_pid_list_set(pid_list, pid) < 0) {
0741             ret = -1;
0742             break;
0743         }
0744         nr_pids++;
0745 
0746         trace_parser_clear(&parser);
0747         ret = 0;
0748     }
0749     trace_parser_put(&parser);
0750 
0751     if (ret < 0) {
0752         trace_pid_list_free(pid_list);
0753         return ret;
0754     }
0755 
0756     if (!nr_pids) {
0757         /* Cleared the list of pids */
0758         trace_pid_list_free(pid_list);
0759         pid_list = NULL;
0760     }
0761 
0762     *new_pid_list = pid_list;
0763 
0764     return read;
0765 }
0766 
0767 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
0768 {
0769     u64 ts;
0770 
0771     /* Early boot up does not have a buffer yet */
0772     if (!buf->buffer)
0773         return trace_clock_local();
0774 
0775     ts = ring_buffer_time_stamp(buf->buffer);
0776     ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
0777 
0778     return ts;
0779 }
0780 
0781 u64 ftrace_now(int cpu)
0782 {
0783     return buffer_ftrace_now(&global_trace.array_buffer, cpu);
0784 }
0785 
0786 /**
0787  * tracing_is_enabled - Show if global_trace has been enabled
0788  *
0789  * Shows if the global trace has been enabled or not. It uses the
0790  * mirror flag "buffer_disabled" to be used in fast paths such as for
0791  * the irqsoff tracer. But it may be inaccurate due to races. If you
0792  * need to know the accurate state, use tracing_is_on() which is a little
0793  * slower, but accurate.
0794  */
0795 int tracing_is_enabled(void)
0796 {
0797     /*
0798      * For quick access (irqsoff uses this in fast path), just
0799      * return the mirror variable of the state of the ring buffer.
0800      * It's a little racy, but we don't really care.
0801      */
0802     smp_rmb();
0803     return !global_trace.buffer_disabled;
0804 }
0805 
0806 /*
0807  * trace_buf_size is the size in bytes that is allocated
0808  * for a buffer. Note, the number of bytes is always rounded
0809  * to page size.
0810  *
0811  * This number is purposely set to a low number of 16384.
0812  * If the dump on oops happens, it will be much appreciated
0813  * to not have to wait for all that output. Anyway this can be
0814  * boot time and run time configurable.
0815  */
0816 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
0817 
0818 static unsigned long        trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
0819 
0820 /* trace_types holds a link list of available tracers. */
0821 static struct tracer        *trace_types __read_mostly;
0822 
0823 /*
0824  * trace_types_lock is used to protect the trace_types list.
0825  */
0826 DEFINE_MUTEX(trace_types_lock);
0827 
0828 /*
0829  * serialize the access of the ring buffer
0830  *
0831  * ring buffer serializes readers, but it is low level protection.
0832  * The validity of the events (which returns by ring_buffer_peek() ..etc)
0833  * are not protected by ring buffer.
0834  *
0835  * The content of events may become garbage if we allow other process consumes
0836  * these events concurrently:
0837  *   A) the page of the consumed events may become a normal page
0838  *      (not reader page) in ring buffer, and this page will be rewritten
0839  *      by events producer.
0840  *   B) The page of the consumed events may become a page for splice_read,
0841  *      and this page will be returned to system.
0842  *
0843  * These primitives allow multi process access to different cpu ring buffer
0844  * concurrently.
0845  *
0846  * These primitives don't distinguish read-only and read-consume access.
0847  * Multi read-only access are also serialized.
0848  */
0849 
0850 #ifdef CONFIG_SMP
0851 static DECLARE_RWSEM(all_cpu_access_lock);
0852 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
0853 
0854 static inline void trace_access_lock(int cpu)
0855 {
0856     if (cpu == RING_BUFFER_ALL_CPUS) {
0857         /* gain it for accessing the whole ring buffer. */
0858         down_write(&all_cpu_access_lock);
0859     } else {
0860         /* gain it for accessing a cpu ring buffer. */
0861 
0862         /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
0863         down_read(&all_cpu_access_lock);
0864 
0865         /* Secondly block other access to this @cpu ring buffer. */
0866         mutex_lock(&per_cpu(cpu_access_lock, cpu));
0867     }
0868 }
0869 
0870 static inline void trace_access_unlock(int cpu)
0871 {
0872     if (cpu == RING_BUFFER_ALL_CPUS) {
0873         up_write(&all_cpu_access_lock);
0874     } else {
0875         mutex_unlock(&per_cpu(cpu_access_lock, cpu));
0876         up_read(&all_cpu_access_lock);
0877     }
0878 }
0879 
0880 static inline void trace_access_lock_init(void)
0881 {
0882     int cpu;
0883 
0884     for_each_possible_cpu(cpu)
0885         mutex_init(&per_cpu(cpu_access_lock, cpu));
0886 }
0887 
0888 #else
0889 
0890 static DEFINE_MUTEX(access_lock);
0891 
0892 static inline void trace_access_lock(int cpu)
0893 {
0894     (void)cpu;
0895     mutex_lock(&access_lock);
0896 }
0897 
0898 static inline void trace_access_unlock(int cpu)
0899 {
0900     (void)cpu;
0901     mutex_unlock(&access_lock);
0902 }
0903 
0904 static inline void trace_access_lock_init(void)
0905 {
0906 }
0907 
0908 #endif
0909 
0910 #ifdef CONFIG_STACKTRACE
0911 static void __ftrace_trace_stack(struct trace_buffer *buffer,
0912                  unsigned int trace_ctx,
0913                  int skip, struct pt_regs *regs);
0914 static inline void ftrace_trace_stack(struct trace_array *tr,
0915                       struct trace_buffer *buffer,
0916                       unsigned int trace_ctx,
0917                       int skip, struct pt_regs *regs);
0918 
0919 #else
0920 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
0921                     unsigned int trace_ctx,
0922                     int skip, struct pt_regs *regs)
0923 {
0924 }
0925 static inline void ftrace_trace_stack(struct trace_array *tr,
0926                       struct trace_buffer *buffer,
0927                       unsigned long trace_ctx,
0928                       int skip, struct pt_regs *regs)
0929 {
0930 }
0931 
0932 #endif
0933 
0934 static __always_inline void
0935 trace_event_setup(struct ring_buffer_event *event,
0936           int type, unsigned int trace_ctx)
0937 {
0938     struct trace_entry *ent = ring_buffer_event_data(event);
0939 
0940     tracing_generic_entry_update(ent, type, trace_ctx);
0941 }
0942 
0943 static __always_inline struct ring_buffer_event *
0944 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
0945               int type,
0946               unsigned long len,
0947               unsigned int trace_ctx)
0948 {
0949     struct ring_buffer_event *event;
0950 
0951     event = ring_buffer_lock_reserve(buffer, len);
0952     if (event != NULL)
0953         trace_event_setup(event, type, trace_ctx);
0954 
0955     return event;
0956 }
0957 
0958 void tracer_tracing_on(struct trace_array *tr)
0959 {
0960     if (tr->array_buffer.buffer)
0961         ring_buffer_record_on(tr->array_buffer.buffer);
0962     /*
0963      * This flag is looked at when buffers haven't been allocated
0964      * yet, or by some tracers (like irqsoff), that just want to
0965      * know if the ring buffer has been disabled, but it can handle
0966      * races of where it gets disabled but we still do a record.
0967      * As the check is in the fast path of the tracers, it is more
0968      * important to be fast than accurate.
0969      */
0970     tr->buffer_disabled = 0;
0971     /* Make the flag seen by readers */
0972     smp_wmb();
0973 }
0974 
0975 /**
0976  * tracing_on - enable tracing buffers
0977  *
0978  * This function enables tracing buffers that may have been
0979  * disabled with tracing_off.
0980  */
0981 void tracing_on(void)
0982 {
0983     tracer_tracing_on(&global_trace);
0984 }
0985 EXPORT_SYMBOL_GPL(tracing_on);
0986 
0987 
0988 static __always_inline void
0989 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
0990 {
0991     __this_cpu_write(trace_taskinfo_save, true);
0992 
0993     /* If this is the temp buffer, we need to commit fully */
0994     if (this_cpu_read(trace_buffered_event) == event) {
0995         /* Length is in event->array[0] */
0996         ring_buffer_write(buffer, event->array[0], &event->array[1]);
0997         /* Release the temp buffer */
0998         this_cpu_dec(trace_buffered_event_cnt);
0999         /* ring_buffer_unlock_commit() enables preemption */
1000         preempt_enable_notrace();
1001     } else
1002         ring_buffer_unlock_commit(buffer, event);
1003 }
1004 
1005 /**
1006  * __trace_puts - write a constant string into the trace buffer.
1007  * @ip:    The address of the caller
1008  * @str:   The constant string to write
1009  * @size:  The size of the string.
1010  */
1011 int __trace_puts(unsigned long ip, const char *str, int size)
1012 {
1013     struct ring_buffer_event *event;
1014     struct trace_buffer *buffer;
1015     struct print_entry *entry;
1016     unsigned int trace_ctx;
1017     int alloc;
1018 
1019     if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1020         return 0;
1021 
1022     if (unlikely(tracing_selftest_running || tracing_disabled))
1023         return 0;
1024 
1025     alloc = sizeof(*entry) + size + 2; /* possible \n added */
1026 
1027     trace_ctx = tracing_gen_ctx();
1028     buffer = global_trace.array_buffer.buffer;
1029     ring_buffer_nest_start(buffer);
1030     event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1031                         trace_ctx);
1032     if (!event) {
1033         size = 0;
1034         goto out;
1035     }
1036 
1037     entry = ring_buffer_event_data(event);
1038     entry->ip = ip;
1039 
1040     memcpy(&entry->buf, str, size);
1041 
1042     /* Add a newline if necessary */
1043     if (entry->buf[size - 1] != '\n') {
1044         entry->buf[size] = '\n';
1045         entry->buf[size + 1] = '\0';
1046     } else
1047         entry->buf[size] = '\0';
1048 
1049     __buffer_unlock_commit(buffer, event);
1050     ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1051  out:
1052     ring_buffer_nest_end(buffer);
1053     return size;
1054 }
1055 EXPORT_SYMBOL_GPL(__trace_puts);
1056 
1057 /**
1058  * __trace_bputs - write the pointer to a constant string into trace buffer
1059  * @ip:    The address of the caller
1060  * @str:   The constant string to write to the buffer to
1061  */
1062 int __trace_bputs(unsigned long ip, const char *str)
1063 {
1064     struct ring_buffer_event *event;
1065     struct trace_buffer *buffer;
1066     struct bputs_entry *entry;
1067     unsigned int trace_ctx;
1068     int size = sizeof(struct bputs_entry);
1069     int ret = 0;
1070 
1071     if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1072         return 0;
1073 
1074     if (unlikely(tracing_selftest_running || tracing_disabled))
1075         return 0;
1076 
1077     trace_ctx = tracing_gen_ctx();
1078     buffer = global_trace.array_buffer.buffer;
1079 
1080     ring_buffer_nest_start(buffer);
1081     event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1082                         trace_ctx);
1083     if (!event)
1084         goto out;
1085 
1086     entry = ring_buffer_event_data(event);
1087     entry->ip           = ip;
1088     entry->str          = str;
1089 
1090     __buffer_unlock_commit(buffer, event);
1091     ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1092 
1093     ret = 1;
1094  out:
1095     ring_buffer_nest_end(buffer);
1096     return ret;
1097 }
1098 EXPORT_SYMBOL_GPL(__trace_bputs);
1099 
1100 #ifdef CONFIG_TRACER_SNAPSHOT
1101 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1102                        void *cond_data)
1103 {
1104     struct tracer *tracer = tr->current_trace;
1105     unsigned long flags;
1106 
1107     if (in_nmi()) {
1108         internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1109         internal_trace_puts("*** snapshot is being ignored        ***\n");
1110         return;
1111     }
1112 
1113     if (!tr->allocated_snapshot) {
1114         internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1115         internal_trace_puts("*** stopping trace here!   ***\n");
1116         tracing_off();
1117         return;
1118     }
1119 
1120     /* Note, snapshot can not be used when the tracer uses it */
1121     if (tracer->use_max_tr) {
1122         internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1123         internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1124         return;
1125     }
1126 
1127     local_irq_save(flags);
1128     update_max_tr(tr, current, smp_processor_id(), cond_data);
1129     local_irq_restore(flags);
1130 }
1131 
1132 void tracing_snapshot_instance(struct trace_array *tr)
1133 {
1134     tracing_snapshot_instance_cond(tr, NULL);
1135 }
1136 
1137 /**
1138  * tracing_snapshot - take a snapshot of the current buffer.
1139  *
1140  * This causes a swap between the snapshot buffer and the current live
1141  * tracing buffer. You can use this to take snapshots of the live
1142  * trace when some condition is triggered, but continue to trace.
1143  *
1144  * Note, make sure to allocate the snapshot with either
1145  * a tracing_snapshot_alloc(), or by doing it manually
1146  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1147  *
1148  * If the snapshot buffer is not allocated, it will stop tracing.
1149  * Basically making a permanent snapshot.
1150  */
1151 void tracing_snapshot(void)
1152 {
1153     struct trace_array *tr = &global_trace;
1154 
1155     tracing_snapshot_instance(tr);
1156 }
1157 EXPORT_SYMBOL_GPL(tracing_snapshot);
1158 
1159 /**
1160  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1161  * @tr:     The tracing instance to snapshot
1162  * @cond_data:  The data to be tested conditionally, and possibly saved
1163  *
1164  * This is the same as tracing_snapshot() except that the snapshot is
1165  * conditional - the snapshot will only happen if the
1166  * cond_snapshot.update() implementation receiving the cond_data
1167  * returns true, which means that the trace array's cond_snapshot
1168  * update() operation used the cond_data to determine whether the
1169  * snapshot should be taken, and if it was, presumably saved it along
1170  * with the snapshot.
1171  */
1172 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1173 {
1174     tracing_snapshot_instance_cond(tr, cond_data);
1175 }
1176 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1177 
1178 /**
1179  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1180  * @tr:     The tracing instance
1181  *
1182  * When the user enables a conditional snapshot using
1183  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1184  * with the snapshot.  This accessor is used to retrieve it.
1185  *
1186  * Should not be called from cond_snapshot.update(), since it takes
1187  * the tr->max_lock lock, which the code calling
1188  * cond_snapshot.update() has already done.
1189  *
1190  * Returns the cond_data associated with the trace array's snapshot.
1191  */
1192 void *tracing_cond_snapshot_data(struct trace_array *tr)
1193 {
1194     void *cond_data = NULL;
1195 
1196     arch_spin_lock(&tr->max_lock);
1197 
1198     if (tr->cond_snapshot)
1199         cond_data = tr->cond_snapshot->cond_data;
1200 
1201     arch_spin_unlock(&tr->max_lock);
1202 
1203     return cond_data;
1204 }
1205 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1206 
1207 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1208                     struct array_buffer *size_buf, int cpu_id);
1209 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1210 
1211 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1212 {
1213     int ret;
1214 
1215     if (!tr->allocated_snapshot) {
1216 
1217         /* allocate spare buffer */
1218         ret = resize_buffer_duplicate_size(&tr->max_buffer,
1219                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1220         if (ret < 0)
1221             return ret;
1222 
1223         tr->allocated_snapshot = true;
1224     }
1225 
1226     return 0;
1227 }
1228 
1229 static void free_snapshot(struct trace_array *tr)
1230 {
1231     /*
1232      * We don't free the ring buffer. instead, resize it because
1233      * The max_tr ring buffer has some state (e.g. ring->clock) and
1234      * we want preserve it.
1235      */
1236     ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1237     set_buffer_entries(&tr->max_buffer, 1);
1238     tracing_reset_online_cpus(&tr->max_buffer);
1239     tr->allocated_snapshot = false;
1240 }
1241 
1242 /**
1243  * tracing_alloc_snapshot - allocate snapshot buffer.
1244  *
1245  * This only allocates the snapshot buffer if it isn't already
1246  * allocated - it doesn't also take a snapshot.
1247  *
1248  * This is meant to be used in cases where the snapshot buffer needs
1249  * to be set up for events that can't sleep but need to be able to
1250  * trigger a snapshot.
1251  */
1252 int tracing_alloc_snapshot(void)
1253 {
1254     struct trace_array *tr = &global_trace;
1255     int ret;
1256 
1257     ret = tracing_alloc_snapshot_instance(tr);
1258     WARN_ON(ret < 0);
1259 
1260     return ret;
1261 }
1262 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1263 
1264 /**
1265  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1266  *
1267  * This is similar to tracing_snapshot(), but it will allocate the
1268  * snapshot buffer if it isn't already allocated. Use this only
1269  * where it is safe to sleep, as the allocation may sleep.
1270  *
1271  * This causes a swap between the snapshot buffer and the current live
1272  * tracing buffer. You can use this to take snapshots of the live
1273  * trace when some condition is triggered, but continue to trace.
1274  */
1275 void tracing_snapshot_alloc(void)
1276 {
1277     int ret;
1278 
1279     ret = tracing_alloc_snapshot();
1280     if (ret < 0)
1281         return;
1282 
1283     tracing_snapshot();
1284 }
1285 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1286 
1287 /**
1288  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1289  * @tr:     The tracing instance
1290  * @cond_data:  User data to associate with the snapshot
1291  * @update: Implementation of the cond_snapshot update function
1292  *
1293  * Check whether the conditional snapshot for the given instance has
1294  * already been enabled, or if the current tracer is already using a
1295  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1296  * save the cond_data and update function inside.
1297  *
1298  * Returns 0 if successful, error otherwise.
1299  */
1300 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1301                  cond_update_fn_t update)
1302 {
1303     struct cond_snapshot *cond_snapshot;
1304     int ret = 0;
1305 
1306     cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1307     if (!cond_snapshot)
1308         return -ENOMEM;
1309 
1310     cond_snapshot->cond_data = cond_data;
1311     cond_snapshot->update = update;
1312 
1313     mutex_lock(&trace_types_lock);
1314 
1315     ret = tracing_alloc_snapshot_instance(tr);
1316     if (ret)
1317         goto fail_unlock;
1318 
1319     if (tr->current_trace->use_max_tr) {
1320         ret = -EBUSY;
1321         goto fail_unlock;
1322     }
1323 
1324     /*
1325      * The cond_snapshot can only change to NULL without the
1326      * trace_types_lock. We don't care if we race with it going
1327      * to NULL, but we want to make sure that it's not set to
1328      * something other than NULL when we get here, which we can
1329      * do safely with only holding the trace_types_lock and not
1330      * having to take the max_lock.
1331      */
1332     if (tr->cond_snapshot) {
1333         ret = -EBUSY;
1334         goto fail_unlock;
1335     }
1336 
1337     arch_spin_lock(&tr->max_lock);
1338     tr->cond_snapshot = cond_snapshot;
1339     arch_spin_unlock(&tr->max_lock);
1340 
1341     mutex_unlock(&trace_types_lock);
1342 
1343     return ret;
1344 
1345  fail_unlock:
1346     mutex_unlock(&trace_types_lock);
1347     kfree(cond_snapshot);
1348     return ret;
1349 }
1350 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1351 
1352 /**
1353  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1354  * @tr:     The tracing instance
1355  *
1356  * Check whether the conditional snapshot for the given instance is
1357  * enabled; if so, free the cond_snapshot associated with it,
1358  * otherwise return -EINVAL.
1359  *
1360  * Returns 0 if successful, error otherwise.
1361  */
1362 int tracing_snapshot_cond_disable(struct trace_array *tr)
1363 {
1364     int ret = 0;
1365 
1366     arch_spin_lock(&tr->max_lock);
1367 
1368     if (!tr->cond_snapshot)
1369         ret = -EINVAL;
1370     else {
1371         kfree(tr->cond_snapshot);
1372         tr->cond_snapshot = NULL;
1373     }
1374 
1375     arch_spin_unlock(&tr->max_lock);
1376 
1377     return ret;
1378 }
1379 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1380 #else
1381 void tracing_snapshot(void)
1382 {
1383     WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1384 }
1385 EXPORT_SYMBOL_GPL(tracing_snapshot);
1386 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1387 {
1388     WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1389 }
1390 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1391 int tracing_alloc_snapshot(void)
1392 {
1393     WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1394     return -ENODEV;
1395 }
1396 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1397 void tracing_snapshot_alloc(void)
1398 {
1399     /* Give warning */
1400     tracing_snapshot();
1401 }
1402 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1403 void *tracing_cond_snapshot_data(struct trace_array *tr)
1404 {
1405     return NULL;
1406 }
1407 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1408 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1409 {
1410     return -ENODEV;
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1413 int tracing_snapshot_cond_disable(struct trace_array *tr)
1414 {
1415     return false;
1416 }
1417 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1418 #endif /* CONFIG_TRACER_SNAPSHOT */
1419 
1420 void tracer_tracing_off(struct trace_array *tr)
1421 {
1422     if (tr->array_buffer.buffer)
1423         ring_buffer_record_off(tr->array_buffer.buffer);
1424     /*
1425      * This flag is looked at when buffers haven't been allocated
1426      * yet, or by some tracers (like irqsoff), that just want to
1427      * know if the ring buffer has been disabled, but it can handle
1428      * races of where it gets disabled but we still do a record.
1429      * As the check is in the fast path of the tracers, it is more
1430      * important to be fast than accurate.
1431      */
1432     tr->buffer_disabled = 1;
1433     /* Make the flag seen by readers */
1434     smp_wmb();
1435 }
1436 
1437 /**
1438  * tracing_off - turn off tracing buffers
1439  *
1440  * This function stops the tracing buffers from recording data.
1441  * It does not disable any overhead the tracers themselves may
1442  * be causing. This function simply causes all recording to
1443  * the ring buffers to fail.
1444  */
1445 void tracing_off(void)
1446 {
1447     tracer_tracing_off(&global_trace);
1448 }
1449 EXPORT_SYMBOL_GPL(tracing_off);
1450 
1451 void disable_trace_on_warning(void)
1452 {
1453     if (__disable_trace_on_warning) {
1454         trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1455             "Disabling tracing due to warning\n");
1456         tracing_off();
1457     }
1458 }
1459 
1460 /**
1461  * tracer_tracing_is_on - show real state of ring buffer enabled
1462  * @tr : the trace array to know if ring buffer is enabled
1463  *
1464  * Shows real state of the ring buffer if it is enabled or not.
1465  */
1466 bool tracer_tracing_is_on(struct trace_array *tr)
1467 {
1468     if (tr->array_buffer.buffer)
1469         return ring_buffer_record_is_on(tr->array_buffer.buffer);
1470     return !tr->buffer_disabled;
1471 }
1472 
1473 /**
1474  * tracing_is_on - show state of ring buffers enabled
1475  */
1476 int tracing_is_on(void)
1477 {
1478     return tracer_tracing_is_on(&global_trace);
1479 }
1480 EXPORT_SYMBOL_GPL(tracing_is_on);
1481 
1482 static int __init set_buf_size(char *str)
1483 {
1484     unsigned long buf_size;
1485 
1486     if (!str)
1487         return 0;
1488     buf_size = memparse(str, &str);
1489     /*
1490      * nr_entries can not be zero and the startup
1491      * tests require some buffer space. Therefore
1492      * ensure we have at least 4096 bytes of buffer.
1493      */
1494     trace_buf_size = max(4096UL, buf_size);
1495     return 1;
1496 }
1497 __setup("trace_buf_size=", set_buf_size);
1498 
1499 static int __init set_tracing_thresh(char *str)
1500 {
1501     unsigned long threshold;
1502     int ret;
1503 
1504     if (!str)
1505         return 0;
1506     ret = kstrtoul(str, 0, &threshold);
1507     if (ret < 0)
1508         return 0;
1509     tracing_thresh = threshold * 1000;
1510     return 1;
1511 }
1512 __setup("tracing_thresh=", set_tracing_thresh);
1513 
1514 unsigned long nsecs_to_usecs(unsigned long nsecs)
1515 {
1516     return nsecs / 1000;
1517 }
1518 
1519 /*
1520  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1521  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1522  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1523  * of strings in the order that the evals (enum) were defined.
1524  */
1525 #undef C
1526 #define C(a, b) b
1527 
1528 /* These must match the bit positions in trace_iterator_flags */
1529 static const char *trace_options[] = {
1530     TRACE_FLAGS
1531     NULL
1532 };
1533 
1534 static struct {
1535     u64 (*func)(void);
1536     const char *name;
1537     int in_ns;      /* is this clock in nanoseconds? */
1538 } trace_clocks[] = {
1539     { trace_clock_local,        "local",    1 },
1540     { trace_clock_global,       "global",   1 },
1541     { trace_clock_counter,      "counter",  0 },
1542     { trace_clock_jiffies,      "uptime",   0 },
1543     { trace_clock,          "perf",     1 },
1544     { ktime_get_mono_fast_ns,   "mono",     1 },
1545     { ktime_get_raw_fast_ns,    "mono_raw", 1 },
1546     { ktime_get_boot_fast_ns,   "boot",     1 },
1547     { ktime_get_tai_fast_ns,    "tai",      1 },
1548     ARCH_TRACE_CLOCKS
1549 };
1550 
1551 bool trace_clock_in_ns(struct trace_array *tr)
1552 {
1553     if (trace_clocks[tr->clock_id].in_ns)
1554         return true;
1555 
1556     return false;
1557 }
1558 
1559 /*
1560  * trace_parser_get_init - gets the buffer for trace parser
1561  */
1562 int trace_parser_get_init(struct trace_parser *parser, int size)
1563 {
1564     memset(parser, 0, sizeof(*parser));
1565 
1566     parser->buffer = kmalloc(size, GFP_KERNEL);
1567     if (!parser->buffer)
1568         return 1;
1569 
1570     parser->size = size;
1571     return 0;
1572 }
1573 
1574 /*
1575  * trace_parser_put - frees the buffer for trace parser
1576  */
1577 void trace_parser_put(struct trace_parser *parser)
1578 {
1579     kfree(parser->buffer);
1580     parser->buffer = NULL;
1581 }
1582 
1583 /*
1584  * trace_get_user - reads the user input string separated by  space
1585  * (matched by isspace(ch))
1586  *
1587  * For each string found the 'struct trace_parser' is updated,
1588  * and the function returns.
1589  *
1590  * Returns number of bytes read.
1591  *
1592  * See kernel/trace/trace.h for 'struct trace_parser' details.
1593  */
1594 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1595     size_t cnt, loff_t *ppos)
1596 {
1597     char ch;
1598     size_t read = 0;
1599     ssize_t ret;
1600 
1601     if (!*ppos)
1602         trace_parser_clear(parser);
1603 
1604     ret = get_user(ch, ubuf++);
1605     if (ret)
1606         goto out;
1607 
1608     read++;
1609     cnt--;
1610 
1611     /*
1612      * The parser is not finished with the last write,
1613      * continue reading the user input without skipping spaces.
1614      */
1615     if (!parser->cont) {
1616         /* skip white space */
1617         while (cnt && isspace(ch)) {
1618             ret = get_user(ch, ubuf++);
1619             if (ret)
1620                 goto out;
1621             read++;
1622             cnt--;
1623         }
1624 
1625         parser->idx = 0;
1626 
1627         /* only spaces were written */
1628         if (isspace(ch) || !ch) {
1629             *ppos += read;
1630             ret = read;
1631             goto out;
1632         }
1633     }
1634 
1635     /* read the non-space input */
1636     while (cnt && !isspace(ch) && ch) {
1637         if (parser->idx < parser->size - 1)
1638             parser->buffer[parser->idx++] = ch;
1639         else {
1640             ret = -EINVAL;
1641             goto out;
1642         }
1643         ret = get_user(ch, ubuf++);
1644         if (ret)
1645             goto out;
1646         read++;
1647         cnt--;
1648     }
1649 
1650     /* We either got finished input or we have to wait for another call. */
1651     if (isspace(ch) || !ch) {
1652         parser->buffer[parser->idx] = 0;
1653         parser->cont = false;
1654     } else if (parser->idx < parser->size - 1) {
1655         parser->cont = true;
1656         parser->buffer[parser->idx++] = ch;
1657         /* Make sure the parsed string always terminates with '\0'. */
1658         parser->buffer[parser->idx] = 0;
1659     } else {
1660         ret = -EINVAL;
1661         goto out;
1662     }
1663 
1664     *ppos += read;
1665     ret = read;
1666 
1667 out:
1668     return ret;
1669 }
1670 
1671 /* TODO add a seq_buf_to_buffer() */
1672 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1673 {
1674     int len;
1675 
1676     if (trace_seq_used(s) <= s->seq.readpos)
1677         return -EBUSY;
1678 
1679     len = trace_seq_used(s) - s->seq.readpos;
1680     if (cnt > len)
1681         cnt = len;
1682     memcpy(buf, s->buffer + s->seq.readpos, cnt);
1683 
1684     s->seq.readpos += cnt;
1685     return cnt;
1686 }
1687 
1688 unsigned long __read_mostly tracing_thresh;
1689 static const struct file_operations tracing_max_lat_fops;
1690 
1691 #ifdef LATENCY_FS_NOTIFY
1692 
1693 static struct workqueue_struct *fsnotify_wq;
1694 
1695 static void latency_fsnotify_workfn(struct work_struct *work)
1696 {
1697     struct trace_array *tr = container_of(work, struct trace_array,
1698                           fsnotify_work);
1699     fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1700 }
1701 
1702 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1703 {
1704     struct trace_array *tr = container_of(iwork, struct trace_array,
1705                           fsnotify_irqwork);
1706     queue_work(fsnotify_wq, &tr->fsnotify_work);
1707 }
1708 
1709 static void trace_create_maxlat_file(struct trace_array *tr,
1710                      struct dentry *d_tracer)
1711 {
1712     INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1713     init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1714     tr->d_max_latency = trace_create_file("tracing_max_latency",
1715                           TRACE_MODE_WRITE,
1716                           d_tracer, &tr->max_latency,
1717                           &tracing_max_lat_fops);
1718 }
1719 
1720 __init static int latency_fsnotify_init(void)
1721 {
1722     fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1723                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1724     if (!fsnotify_wq) {
1725         pr_err("Unable to allocate tr_max_lat_wq\n");
1726         return -ENOMEM;
1727     }
1728     return 0;
1729 }
1730 
1731 late_initcall_sync(latency_fsnotify_init);
1732 
1733 void latency_fsnotify(struct trace_array *tr)
1734 {
1735     if (!fsnotify_wq)
1736         return;
1737     /*
1738      * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1739      * possible that we are called from __schedule() or do_idle(), which
1740      * could cause a deadlock.
1741      */
1742     irq_work_queue(&tr->fsnotify_irqwork);
1743 }
1744 
1745 #elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)  \
1746     || defined(CONFIG_OSNOISE_TRACER)
1747 
1748 #define trace_create_maxlat_file(tr, d_tracer)              \
1749     trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,  \
1750               d_tracer, &tr->max_latency, &tracing_max_lat_fops)
1751 
1752 #else
1753 #define trace_create_maxlat_file(tr, d_tracer)   do { } while (0)
1754 #endif
1755 
1756 #ifdef CONFIG_TRACER_MAX_TRACE
1757 /*
1758  * Copy the new maximum trace into the separate maximum-trace
1759  * structure. (this way the maximum trace is permanently saved,
1760  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1761  */
1762 static void
1763 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1764 {
1765     struct array_buffer *trace_buf = &tr->array_buffer;
1766     struct array_buffer *max_buf = &tr->max_buffer;
1767     struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1768     struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1769 
1770     max_buf->cpu = cpu;
1771     max_buf->time_start = data->preempt_timestamp;
1772 
1773     max_data->saved_latency = tr->max_latency;
1774     max_data->critical_start = data->critical_start;
1775     max_data->critical_end = data->critical_end;
1776 
1777     strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1778     max_data->pid = tsk->pid;
1779     /*
1780      * If tsk == current, then use current_uid(), as that does not use
1781      * RCU. The irq tracer can be called out of RCU scope.
1782      */
1783     if (tsk == current)
1784         max_data->uid = current_uid();
1785     else
1786         max_data->uid = task_uid(tsk);
1787 
1788     max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1789     max_data->policy = tsk->policy;
1790     max_data->rt_priority = tsk->rt_priority;
1791 
1792     /* record this tasks comm */
1793     tracing_record_cmdline(tsk);
1794     latency_fsnotify(tr);
1795 }
1796 
1797 /**
1798  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1799  * @tr: tracer
1800  * @tsk: the task with the latency
1801  * @cpu: The cpu that initiated the trace.
1802  * @cond_data: User data associated with a conditional snapshot
1803  *
1804  * Flip the buffers between the @tr and the max_tr and record information
1805  * about which task was the cause of this latency.
1806  */
1807 void
1808 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1809           void *cond_data)
1810 {
1811     if (tr->stop_count)
1812         return;
1813 
1814     WARN_ON_ONCE(!irqs_disabled());
1815 
1816     if (!tr->allocated_snapshot) {
1817         /* Only the nop tracer should hit this when disabling */
1818         WARN_ON_ONCE(tr->current_trace != &nop_trace);
1819         return;
1820     }
1821 
1822     arch_spin_lock(&tr->max_lock);
1823 
1824     /* Inherit the recordable setting from array_buffer */
1825     if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1826         ring_buffer_record_on(tr->max_buffer.buffer);
1827     else
1828         ring_buffer_record_off(tr->max_buffer.buffer);
1829 
1830 #ifdef CONFIG_TRACER_SNAPSHOT
1831     if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1832         goto out_unlock;
1833 #endif
1834     swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1835 
1836     __update_max_tr(tr, tsk, cpu);
1837 
1838  out_unlock:
1839     arch_spin_unlock(&tr->max_lock);
1840 }
1841 
1842 /**
1843  * update_max_tr_single - only copy one trace over, and reset the rest
1844  * @tr: tracer
1845  * @tsk: task with the latency
1846  * @cpu: the cpu of the buffer to copy.
1847  *
1848  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1849  */
1850 void
1851 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1852 {
1853     int ret;
1854 
1855     if (tr->stop_count)
1856         return;
1857 
1858     WARN_ON_ONCE(!irqs_disabled());
1859     if (!tr->allocated_snapshot) {
1860         /* Only the nop tracer should hit this when disabling */
1861         WARN_ON_ONCE(tr->current_trace != &nop_trace);
1862         return;
1863     }
1864 
1865     arch_spin_lock(&tr->max_lock);
1866 
1867     ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1868 
1869     if (ret == -EBUSY) {
1870         /*
1871          * We failed to swap the buffer due to a commit taking
1872          * place on this CPU. We fail to record, but we reset
1873          * the max trace buffer (no one writes directly to it)
1874          * and flag that it failed.
1875          */
1876         trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1877             "Failed to swap buffers due to commit in progress\n");
1878     }
1879 
1880     WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1881 
1882     __update_max_tr(tr, tsk, cpu);
1883     arch_spin_unlock(&tr->max_lock);
1884 }
1885 #endif /* CONFIG_TRACER_MAX_TRACE */
1886 
1887 static int wait_on_pipe(struct trace_iterator *iter, int full)
1888 {
1889     /* Iterators are static, they should be filled or empty */
1890     if (trace_buffer_iter(iter, iter->cpu_file))
1891         return 0;
1892 
1893     return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1894                 full);
1895 }
1896 
1897 #ifdef CONFIG_FTRACE_STARTUP_TEST
1898 static bool selftests_can_run;
1899 
1900 struct trace_selftests {
1901     struct list_head        list;
1902     struct tracer           *type;
1903 };
1904 
1905 static LIST_HEAD(postponed_selftests);
1906 
1907 static int save_selftest(struct tracer *type)
1908 {
1909     struct trace_selftests *selftest;
1910 
1911     selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1912     if (!selftest)
1913         return -ENOMEM;
1914 
1915     selftest->type = type;
1916     list_add(&selftest->list, &postponed_selftests);
1917     return 0;
1918 }
1919 
1920 static int run_tracer_selftest(struct tracer *type)
1921 {
1922     struct trace_array *tr = &global_trace;
1923     struct tracer *saved_tracer = tr->current_trace;
1924     int ret;
1925 
1926     if (!type->selftest || tracing_selftest_disabled)
1927         return 0;
1928 
1929     /*
1930      * If a tracer registers early in boot up (before scheduling is
1931      * initialized and such), then do not run its selftests yet.
1932      * Instead, run it a little later in the boot process.
1933      */
1934     if (!selftests_can_run)
1935         return save_selftest(type);
1936 
1937     if (!tracing_is_on()) {
1938         pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1939             type->name);
1940         return 0;
1941     }
1942 
1943     /*
1944      * Run a selftest on this tracer.
1945      * Here we reset the trace buffer, and set the current
1946      * tracer to be this tracer. The tracer can then run some
1947      * internal tracing to verify that everything is in order.
1948      * If we fail, we do not register this tracer.
1949      */
1950     tracing_reset_online_cpus(&tr->array_buffer);
1951 
1952     tr->current_trace = type;
1953 
1954 #ifdef CONFIG_TRACER_MAX_TRACE
1955     if (type->use_max_tr) {
1956         /* If we expanded the buffers, make sure the max is expanded too */
1957         if (ring_buffer_expanded)
1958             ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1959                        RING_BUFFER_ALL_CPUS);
1960         tr->allocated_snapshot = true;
1961     }
1962 #endif
1963 
1964     /* the test is responsible for initializing and enabling */
1965     pr_info("Testing tracer %s: ", type->name);
1966     ret = type->selftest(type, tr);
1967     /* the test is responsible for resetting too */
1968     tr->current_trace = saved_tracer;
1969     if (ret) {
1970         printk(KERN_CONT "FAILED!\n");
1971         /* Add the warning after printing 'FAILED' */
1972         WARN_ON(1);
1973         return -1;
1974     }
1975     /* Only reset on passing, to avoid touching corrupted buffers */
1976     tracing_reset_online_cpus(&tr->array_buffer);
1977 
1978 #ifdef CONFIG_TRACER_MAX_TRACE
1979     if (type->use_max_tr) {
1980         tr->allocated_snapshot = false;
1981 
1982         /* Shrink the max buffer again */
1983         if (ring_buffer_expanded)
1984             ring_buffer_resize(tr->max_buffer.buffer, 1,
1985                        RING_BUFFER_ALL_CPUS);
1986     }
1987 #endif
1988 
1989     printk(KERN_CONT "PASSED\n");
1990     return 0;
1991 }
1992 
1993 static __init int init_trace_selftests(void)
1994 {
1995     struct trace_selftests *p, *n;
1996     struct tracer *t, **last;
1997     int ret;
1998 
1999     selftests_can_run = true;
2000 
2001     mutex_lock(&trace_types_lock);
2002 
2003     if (list_empty(&postponed_selftests))
2004         goto out;
2005 
2006     pr_info("Running postponed tracer tests:\n");
2007 
2008     tracing_selftest_running = true;
2009     list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2010         /* This loop can take minutes when sanitizers are enabled, so
2011          * lets make sure we allow RCU processing.
2012          */
2013         cond_resched();
2014         ret = run_tracer_selftest(p->type);
2015         /* If the test fails, then warn and remove from available_tracers */
2016         if (ret < 0) {
2017             WARN(1, "tracer: %s failed selftest, disabling\n",
2018                  p->type->name);
2019             last = &trace_types;
2020             for (t = trace_types; t; t = t->next) {
2021                 if (t == p->type) {
2022                     *last = t->next;
2023                     break;
2024                 }
2025                 last = &t->next;
2026             }
2027         }
2028         list_del(&p->list);
2029         kfree(p);
2030     }
2031     tracing_selftest_running = false;
2032 
2033  out:
2034     mutex_unlock(&trace_types_lock);
2035 
2036     return 0;
2037 }
2038 core_initcall(init_trace_selftests);
2039 #else
2040 static inline int run_tracer_selftest(struct tracer *type)
2041 {
2042     return 0;
2043 }
2044 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2045 
2046 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2047 
2048 static void __init apply_trace_boot_options(void);
2049 
2050 /**
2051  * register_tracer - register a tracer with the ftrace system.
2052  * @type: the plugin for the tracer
2053  *
2054  * Register a new plugin tracer.
2055  */
2056 int __init register_tracer(struct tracer *type)
2057 {
2058     struct tracer *t;
2059     int ret = 0;
2060 
2061     if (!type->name) {
2062         pr_info("Tracer must have a name\n");
2063         return -1;
2064     }
2065 
2066     if (strlen(type->name) >= MAX_TRACER_SIZE) {
2067         pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2068         return -1;
2069     }
2070 
2071     if (security_locked_down(LOCKDOWN_TRACEFS)) {
2072         pr_warn("Can not register tracer %s due to lockdown\n",
2073                type->name);
2074         return -EPERM;
2075     }
2076 
2077     mutex_lock(&trace_types_lock);
2078 
2079     tracing_selftest_running = true;
2080 
2081     for (t = trace_types; t; t = t->next) {
2082         if (strcmp(type->name, t->name) == 0) {
2083             /* already found */
2084             pr_info("Tracer %s already registered\n",
2085                 type->name);
2086             ret = -1;
2087             goto out;
2088         }
2089     }
2090 
2091     if (!type->set_flag)
2092         type->set_flag = &dummy_set_flag;
2093     if (!type->flags) {
2094         /*allocate a dummy tracer_flags*/
2095         type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2096         if (!type->flags) {
2097             ret = -ENOMEM;
2098             goto out;
2099         }
2100         type->flags->val = 0;
2101         type->flags->opts = dummy_tracer_opt;
2102     } else
2103         if (!type->flags->opts)
2104             type->flags->opts = dummy_tracer_opt;
2105 
2106     /* store the tracer for __set_tracer_option */
2107     type->flags->trace = type;
2108 
2109     ret = run_tracer_selftest(type);
2110     if (ret < 0)
2111         goto out;
2112 
2113     type->next = trace_types;
2114     trace_types = type;
2115     add_tracer_options(&global_trace, type);
2116 
2117  out:
2118     tracing_selftest_running = false;
2119     mutex_unlock(&trace_types_lock);
2120 
2121     if (ret || !default_bootup_tracer)
2122         goto out_unlock;
2123 
2124     if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2125         goto out_unlock;
2126 
2127     printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2128     /* Do we want this tracer to start on bootup? */
2129     tracing_set_tracer(&global_trace, type->name);
2130     default_bootup_tracer = NULL;
2131 
2132     apply_trace_boot_options();
2133 
2134     /* disable other selftests, since this will break it. */
2135     disable_tracing_selftest("running a tracer");
2136 
2137  out_unlock:
2138     return ret;
2139 }
2140 
2141 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2142 {
2143     struct trace_buffer *buffer = buf->buffer;
2144 
2145     if (!buffer)
2146         return;
2147 
2148     ring_buffer_record_disable(buffer);
2149 
2150     /* Make sure all commits have finished */
2151     synchronize_rcu();
2152     ring_buffer_reset_cpu(buffer, cpu);
2153 
2154     ring_buffer_record_enable(buffer);
2155 }
2156 
2157 void tracing_reset_online_cpus(struct array_buffer *buf)
2158 {
2159     struct trace_buffer *buffer = buf->buffer;
2160 
2161     if (!buffer)
2162         return;
2163 
2164     ring_buffer_record_disable(buffer);
2165 
2166     /* Make sure all commits have finished */
2167     synchronize_rcu();
2168 
2169     buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2170 
2171     ring_buffer_reset_online_cpus(buffer);
2172 
2173     ring_buffer_record_enable(buffer);
2174 }
2175 
2176 /* Must have trace_types_lock held */
2177 void tracing_reset_all_online_cpus(void)
2178 {
2179     struct trace_array *tr;
2180 
2181     list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2182         if (!tr->clear_trace)
2183             continue;
2184         tr->clear_trace = false;
2185         tracing_reset_online_cpus(&tr->array_buffer);
2186 #ifdef CONFIG_TRACER_MAX_TRACE
2187         tracing_reset_online_cpus(&tr->max_buffer);
2188 #endif
2189     }
2190 }
2191 
2192 /*
2193  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2194  * is the tgid last observed corresponding to pid=i.
2195  */
2196 static int *tgid_map;
2197 
2198 /* The maximum valid index into tgid_map. */
2199 static size_t tgid_map_max;
2200 
2201 #define SAVED_CMDLINES_DEFAULT 128
2202 #define NO_CMDLINE_MAP UINT_MAX
2203 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2204 struct saved_cmdlines_buffer {
2205     unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2206     unsigned *map_cmdline_to_pid;
2207     unsigned cmdline_num;
2208     int cmdline_idx;
2209     char *saved_cmdlines;
2210 };
2211 static struct saved_cmdlines_buffer *savedcmd;
2212 
2213 static inline char *get_saved_cmdlines(int idx)
2214 {
2215     return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2216 }
2217 
2218 static inline void set_cmdline(int idx, const char *cmdline)
2219 {
2220     strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2221 }
2222 
2223 static int allocate_cmdlines_buffer(unsigned int val,
2224                     struct saved_cmdlines_buffer *s)
2225 {
2226     s->map_cmdline_to_pid = kmalloc_array(val,
2227                           sizeof(*s->map_cmdline_to_pid),
2228                           GFP_KERNEL);
2229     if (!s->map_cmdline_to_pid)
2230         return -ENOMEM;
2231 
2232     s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2233     if (!s->saved_cmdlines) {
2234         kfree(s->map_cmdline_to_pid);
2235         return -ENOMEM;
2236     }
2237 
2238     s->cmdline_idx = 0;
2239     s->cmdline_num = val;
2240     memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2241            sizeof(s->map_pid_to_cmdline));
2242     memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2243            val * sizeof(*s->map_cmdline_to_pid));
2244 
2245     return 0;
2246 }
2247 
2248 static int trace_create_savedcmd(void)
2249 {
2250     int ret;
2251 
2252     savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2253     if (!savedcmd)
2254         return -ENOMEM;
2255 
2256     ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2257     if (ret < 0) {
2258         kfree(savedcmd);
2259         savedcmd = NULL;
2260         return -ENOMEM;
2261     }
2262 
2263     return 0;
2264 }
2265 
2266 int is_tracing_stopped(void)
2267 {
2268     return global_trace.stop_count;
2269 }
2270 
2271 /**
2272  * tracing_start - quick start of the tracer
2273  *
2274  * If tracing is enabled but was stopped by tracing_stop,
2275  * this will start the tracer back up.
2276  */
2277 void tracing_start(void)
2278 {
2279     struct trace_buffer *buffer;
2280     unsigned long flags;
2281 
2282     if (tracing_disabled)
2283         return;
2284 
2285     raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2286     if (--global_trace.stop_count) {
2287         if (global_trace.stop_count < 0) {
2288             /* Someone screwed up their debugging */
2289             WARN_ON_ONCE(1);
2290             global_trace.stop_count = 0;
2291         }
2292         goto out;
2293     }
2294 
2295     /* Prevent the buffers from switching */
2296     arch_spin_lock(&global_trace.max_lock);
2297 
2298     buffer = global_trace.array_buffer.buffer;
2299     if (buffer)
2300         ring_buffer_record_enable(buffer);
2301 
2302 #ifdef CONFIG_TRACER_MAX_TRACE
2303     buffer = global_trace.max_buffer.buffer;
2304     if (buffer)
2305         ring_buffer_record_enable(buffer);
2306 #endif
2307 
2308     arch_spin_unlock(&global_trace.max_lock);
2309 
2310  out:
2311     raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2312 }
2313 
2314 static void tracing_start_tr(struct trace_array *tr)
2315 {
2316     struct trace_buffer *buffer;
2317     unsigned long flags;
2318 
2319     if (tracing_disabled)
2320         return;
2321 
2322     /* If global, we need to also start the max tracer */
2323     if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2324         return tracing_start();
2325 
2326     raw_spin_lock_irqsave(&tr->start_lock, flags);
2327 
2328     if (--tr->stop_count) {
2329         if (tr->stop_count < 0) {
2330             /* Someone screwed up their debugging */
2331             WARN_ON_ONCE(1);
2332             tr->stop_count = 0;
2333         }
2334         goto out;
2335     }
2336 
2337     buffer = tr->array_buffer.buffer;
2338     if (buffer)
2339         ring_buffer_record_enable(buffer);
2340 
2341  out:
2342     raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2343 }
2344 
2345 /**
2346  * tracing_stop - quick stop of the tracer
2347  *
2348  * Light weight way to stop tracing. Use in conjunction with
2349  * tracing_start.
2350  */
2351 void tracing_stop(void)
2352 {
2353     struct trace_buffer *buffer;
2354     unsigned long flags;
2355 
2356     raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2357     if (global_trace.stop_count++)
2358         goto out;
2359 
2360     /* Prevent the buffers from switching */
2361     arch_spin_lock(&global_trace.max_lock);
2362 
2363     buffer = global_trace.array_buffer.buffer;
2364     if (buffer)
2365         ring_buffer_record_disable(buffer);
2366 
2367 #ifdef CONFIG_TRACER_MAX_TRACE
2368     buffer = global_trace.max_buffer.buffer;
2369     if (buffer)
2370         ring_buffer_record_disable(buffer);
2371 #endif
2372 
2373     arch_spin_unlock(&global_trace.max_lock);
2374 
2375  out:
2376     raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2377 }
2378 
2379 static void tracing_stop_tr(struct trace_array *tr)
2380 {
2381     struct trace_buffer *buffer;
2382     unsigned long flags;
2383 
2384     /* If global, we need to also stop the max tracer */
2385     if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2386         return tracing_stop();
2387 
2388     raw_spin_lock_irqsave(&tr->start_lock, flags);
2389     if (tr->stop_count++)
2390         goto out;
2391 
2392     buffer = tr->array_buffer.buffer;
2393     if (buffer)
2394         ring_buffer_record_disable(buffer);
2395 
2396  out:
2397     raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2398 }
2399 
2400 static int trace_save_cmdline(struct task_struct *tsk)
2401 {
2402     unsigned tpid, idx;
2403 
2404     /* treat recording of idle task as a success */
2405     if (!tsk->pid)
2406         return 1;
2407 
2408     tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2409 
2410     /*
2411      * It's not the end of the world if we don't get
2412      * the lock, but we also don't want to spin
2413      * nor do we want to disable interrupts,
2414      * so if we miss here, then better luck next time.
2415      */
2416     if (!arch_spin_trylock(&trace_cmdline_lock))
2417         return 0;
2418 
2419     idx = savedcmd->map_pid_to_cmdline[tpid];
2420     if (idx == NO_CMDLINE_MAP) {
2421         idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2422 
2423         savedcmd->map_pid_to_cmdline[tpid] = idx;
2424         savedcmd->cmdline_idx = idx;
2425     }
2426 
2427     savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2428     set_cmdline(idx, tsk->comm);
2429 
2430     arch_spin_unlock(&trace_cmdline_lock);
2431 
2432     return 1;
2433 }
2434 
2435 static void __trace_find_cmdline(int pid, char comm[])
2436 {
2437     unsigned map;
2438     int tpid;
2439 
2440     if (!pid) {
2441         strcpy(comm, "<idle>");
2442         return;
2443     }
2444 
2445     if (WARN_ON_ONCE(pid < 0)) {
2446         strcpy(comm, "<XXX>");
2447         return;
2448     }
2449 
2450     tpid = pid & (PID_MAX_DEFAULT - 1);
2451     map = savedcmd->map_pid_to_cmdline[tpid];
2452     if (map != NO_CMDLINE_MAP) {
2453         tpid = savedcmd->map_cmdline_to_pid[map];
2454         if (tpid == pid) {
2455             strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2456             return;
2457         }
2458     }
2459     strcpy(comm, "<...>");
2460 }
2461 
2462 void trace_find_cmdline(int pid, char comm[])
2463 {
2464     preempt_disable();
2465     arch_spin_lock(&trace_cmdline_lock);
2466 
2467     __trace_find_cmdline(pid, comm);
2468 
2469     arch_spin_unlock(&trace_cmdline_lock);
2470     preempt_enable();
2471 }
2472 
2473 static int *trace_find_tgid_ptr(int pid)
2474 {
2475     /*
2476      * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2477      * if we observe a non-NULL tgid_map then we also observe the correct
2478      * tgid_map_max.
2479      */
2480     int *map = smp_load_acquire(&tgid_map);
2481 
2482     if (unlikely(!map || pid > tgid_map_max))
2483         return NULL;
2484 
2485     return &map[pid];
2486 }
2487 
2488 int trace_find_tgid(int pid)
2489 {
2490     int *ptr = trace_find_tgid_ptr(pid);
2491 
2492     return ptr ? *ptr : 0;
2493 }
2494 
2495 static int trace_save_tgid(struct task_struct *tsk)
2496 {
2497     int *ptr;
2498 
2499     /* treat recording of idle task as a success */
2500     if (!tsk->pid)
2501         return 1;
2502 
2503     ptr = trace_find_tgid_ptr(tsk->pid);
2504     if (!ptr)
2505         return 0;
2506 
2507     *ptr = tsk->tgid;
2508     return 1;
2509 }
2510 
2511 static bool tracing_record_taskinfo_skip(int flags)
2512 {
2513     if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2514         return true;
2515     if (!__this_cpu_read(trace_taskinfo_save))
2516         return true;
2517     return false;
2518 }
2519 
2520 /**
2521  * tracing_record_taskinfo - record the task info of a task
2522  *
2523  * @task:  task to record
2524  * @flags: TRACE_RECORD_CMDLINE for recording comm
2525  *         TRACE_RECORD_TGID for recording tgid
2526  */
2527 void tracing_record_taskinfo(struct task_struct *task, int flags)
2528 {
2529     bool done;
2530 
2531     if (tracing_record_taskinfo_skip(flags))
2532         return;
2533 
2534     /*
2535      * Record as much task information as possible. If some fail, continue
2536      * to try to record the others.
2537      */
2538     done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2539     done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2540 
2541     /* If recording any information failed, retry again soon. */
2542     if (!done)
2543         return;
2544 
2545     __this_cpu_write(trace_taskinfo_save, false);
2546 }
2547 
2548 /**
2549  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2550  *
2551  * @prev: previous task during sched_switch
2552  * @next: next task during sched_switch
2553  * @flags: TRACE_RECORD_CMDLINE for recording comm
2554  *         TRACE_RECORD_TGID for recording tgid
2555  */
2556 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2557                       struct task_struct *next, int flags)
2558 {
2559     bool done;
2560 
2561     if (tracing_record_taskinfo_skip(flags))
2562         return;
2563 
2564     /*
2565      * Record as much task information as possible. If some fail, continue
2566      * to try to record the others.
2567      */
2568     done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2569     done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2570     done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2571     done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2572 
2573     /* If recording any information failed, retry again soon. */
2574     if (!done)
2575         return;
2576 
2577     __this_cpu_write(trace_taskinfo_save, false);
2578 }
2579 
2580 /* Helpers to record a specific task information */
2581 void tracing_record_cmdline(struct task_struct *task)
2582 {
2583     tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2584 }
2585 
2586 void tracing_record_tgid(struct task_struct *task)
2587 {
2588     tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2589 }
2590 
2591 /*
2592  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2593  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2594  * simplifies those functions and keeps them in sync.
2595  */
2596 enum print_line_t trace_handle_return(struct trace_seq *s)
2597 {
2598     return trace_seq_has_overflowed(s) ?
2599         TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2600 }
2601 EXPORT_SYMBOL_GPL(trace_handle_return);
2602 
2603 static unsigned short migration_disable_value(void)
2604 {
2605 #if defined(CONFIG_SMP)
2606     return current->migration_disabled;
2607 #else
2608     return 0;
2609 #endif
2610 }
2611 
2612 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2613 {
2614     unsigned int trace_flags = irqs_status;
2615     unsigned int pc;
2616 
2617     pc = preempt_count();
2618 
2619     if (pc & NMI_MASK)
2620         trace_flags |= TRACE_FLAG_NMI;
2621     if (pc & HARDIRQ_MASK)
2622         trace_flags |= TRACE_FLAG_HARDIRQ;
2623     if (in_serving_softirq())
2624         trace_flags |= TRACE_FLAG_SOFTIRQ;
2625     if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2626         trace_flags |= TRACE_FLAG_BH_OFF;
2627 
2628     if (tif_need_resched())
2629         trace_flags |= TRACE_FLAG_NEED_RESCHED;
2630     if (test_preempt_need_resched())
2631         trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2632     return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2633         (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2634 }
2635 
2636 struct ring_buffer_event *
2637 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2638               int type,
2639               unsigned long len,
2640               unsigned int trace_ctx)
2641 {
2642     return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2643 }
2644 
2645 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2646 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2647 static int trace_buffered_event_ref;
2648 
2649 /**
2650  * trace_buffered_event_enable - enable buffering events
2651  *
2652  * When events are being filtered, it is quicker to use a temporary
2653  * buffer to write the event data into if there's a likely chance
2654  * that it will not be committed. The discard of the ring buffer
2655  * is not as fast as committing, and is much slower than copying
2656  * a commit.
2657  *
2658  * When an event is to be filtered, allocate per cpu buffers to
2659  * write the event data into, and if the event is filtered and discarded
2660  * it is simply dropped, otherwise, the entire data is to be committed
2661  * in one shot.
2662  */
2663 void trace_buffered_event_enable(void)
2664 {
2665     struct ring_buffer_event *event;
2666     struct page *page;
2667     int cpu;
2668 
2669     WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2670 
2671     if (trace_buffered_event_ref++)
2672         return;
2673 
2674     for_each_tracing_cpu(cpu) {
2675         page = alloc_pages_node(cpu_to_node(cpu),
2676                     GFP_KERNEL | __GFP_NORETRY, 0);
2677         if (!page)
2678             goto failed;
2679 
2680         event = page_address(page);
2681         memset(event, 0, sizeof(*event));
2682 
2683         per_cpu(trace_buffered_event, cpu) = event;
2684 
2685         preempt_disable();
2686         if (cpu == smp_processor_id() &&
2687             __this_cpu_read(trace_buffered_event) !=
2688             per_cpu(trace_buffered_event, cpu))
2689             WARN_ON_ONCE(1);
2690         preempt_enable();
2691     }
2692 
2693     return;
2694  failed:
2695     trace_buffered_event_disable();
2696 }
2697 
2698 static void enable_trace_buffered_event(void *data)
2699 {
2700     /* Probably not needed, but do it anyway */
2701     smp_rmb();
2702     this_cpu_dec(trace_buffered_event_cnt);
2703 }
2704 
2705 static void disable_trace_buffered_event(void *data)
2706 {
2707     this_cpu_inc(trace_buffered_event_cnt);
2708 }
2709 
2710 /**
2711  * trace_buffered_event_disable - disable buffering events
2712  *
2713  * When a filter is removed, it is faster to not use the buffered
2714  * events, and to commit directly into the ring buffer. Free up
2715  * the temp buffers when there are no more users. This requires
2716  * special synchronization with current events.
2717  */
2718 void trace_buffered_event_disable(void)
2719 {
2720     int cpu;
2721 
2722     WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2723 
2724     if (WARN_ON_ONCE(!trace_buffered_event_ref))
2725         return;
2726 
2727     if (--trace_buffered_event_ref)
2728         return;
2729 
2730     preempt_disable();
2731     /* For each CPU, set the buffer as used. */
2732     smp_call_function_many(tracing_buffer_mask,
2733                    disable_trace_buffered_event, NULL, 1);
2734     preempt_enable();
2735 
2736     /* Wait for all current users to finish */
2737     synchronize_rcu();
2738 
2739     for_each_tracing_cpu(cpu) {
2740         free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2741         per_cpu(trace_buffered_event, cpu) = NULL;
2742     }
2743     /*
2744      * Make sure trace_buffered_event is NULL before clearing
2745      * trace_buffered_event_cnt.
2746      */
2747     smp_wmb();
2748 
2749     preempt_disable();
2750     /* Do the work on each cpu */
2751     smp_call_function_many(tracing_buffer_mask,
2752                    enable_trace_buffered_event, NULL, 1);
2753     preempt_enable();
2754 }
2755 
2756 static struct trace_buffer *temp_buffer;
2757 
2758 struct ring_buffer_event *
2759 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2760               struct trace_event_file *trace_file,
2761               int type, unsigned long len,
2762               unsigned int trace_ctx)
2763 {
2764     struct ring_buffer_event *entry;
2765     struct trace_array *tr = trace_file->tr;
2766     int val;
2767 
2768     *current_rb = tr->array_buffer.buffer;
2769 
2770     if (!tr->no_filter_buffering_ref &&
2771         (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2772         preempt_disable_notrace();
2773         /*
2774          * Filtering is on, so try to use the per cpu buffer first.
2775          * This buffer will simulate a ring_buffer_event,
2776          * where the type_len is zero and the array[0] will
2777          * hold the full length.
2778          * (see include/linux/ring-buffer.h for details on
2779          *  how the ring_buffer_event is structured).
2780          *
2781          * Using a temp buffer during filtering and copying it
2782          * on a matched filter is quicker than writing directly
2783          * into the ring buffer and then discarding it when
2784          * it doesn't match. That is because the discard
2785          * requires several atomic operations to get right.
2786          * Copying on match and doing nothing on a failed match
2787          * is still quicker than no copy on match, but having
2788          * to discard out of the ring buffer on a failed match.
2789          */
2790         if ((entry = __this_cpu_read(trace_buffered_event))) {
2791             int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2792 
2793             val = this_cpu_inc_return(trace_buffered_event_cnt);
2794 
2795             /*
2796              * Preemption is disabled, but interrupts and NMIs
2797              * can still come in now. If that happens after
2798              * the above increment, then it will have to go
2799              * back to the old method of allocating the event
2800              * on the ring buffer, and if the filter fails, it
2801              * will have to call ring_buffer_discard_commit()
2802              * to remove it.
2803              *
2804              * Need to also check the unlikely case that the
2805              * length is bigger than the temp buffer size.
2806              * If that happens, then the reserve is pretty much
2807              * guaranteed to fail, as the ring buffer currently
2808              * only allows events less than a page. But that may
2809              * change in the future, so let the ring buffer reserve
2810              * handle the failure in that case.
2811              */
2812             if (val == 1 && likely(len <= max_len)) {
2813                 trace_event_setup(entry, type, trace_ctx);
2814                 entry->array[0] = len;
2815                 /* Return with preemption disabled */
2816                 return entry;
2817             }
2818             this_cpu_dec(trace_buffered_event_cnt);
2819         }
2820         /* __trace_buffer_lock_reserve() disables preemption */
2821         preempt_enable_notrace();
2822     }
2823 
2824     entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2825                         trace_ctx);
2826     /*
2827      * If tracing is off, but we have triggers enabled
2828      * we still need to look at the event data. Use the temp_buffer
2829      * to store the trace event for the trigger to use. It's recursive
2830      * safe and will not be recorded anywhere.
2831      */
2832     if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2833         *current_rb = temp_buffer;
2834         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2835                             trace_ctx);
2836     }
2837     return entry;
2838 }
2839 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2840 
2841 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2842 static DEFINE_MUTEX(tracepoint_printk_mutex);
2843 
2844 static void output_printk(struct trace_event_buffer *fbuffer)
2845 {
2846     struct trace_event_call *event_call;
2847     struct trace_event_file *file;
2848     struct trace_event *event;
2849     unsigned long flags;
2850     struct trace_iterator *iter = tracepoint_print_iter;
2851 
2852     /* We should never get here if iter is NULL */
2853     if (WARN_ON_ONCE(!iter))
2854         return;
2855 
2856     event_call = fbuffer->trace_file->event_call;
2857     if (!event_call || !event_call->event.funcs ||
2858         !event_call->event.funcs->trace)
2859         return;
2860 
2861     file = fbuffer->trace_file;
2862     if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2863         (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2864          !filter_match_preds(file->filter, fbuffer->entry)))
2865         return;
2866 
2867     event = &fbuffer->trace_file->event_call->event;
2868 
2869     raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2870     trace_seq_init(&iter->seq);
2871     iter->ent = fbuffer->entry;
2872     event_call->event.funcs->trace(iter, 0, event);
2873     trace_seq_putc(&iter->seq, 0);
2874     printk("%s", iter->seq.buffer);
2875 
2876     raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2877 }
2878 
2879 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2880                  void *buffer, size_t *lenp,
2881                  loff_t *ppos)
2882 {
2883     int save_tracepoint_printk;
2884     int ret;
2885 
2886     mutex_lock(&tracepoint_printk_mutex);
2887     save_tracepoint_printk = tracepoint_printk;
2888 
2889     ret = proc_dointvec(table, write, buffer, lenp, ppos);
2890 
2891     /*
2892      * This will force exiting early, as tracepoint_printk
2893      * is always zero when tracepoint_printk_iter is not allocated
2894      */
2895     if (!tracepoint_print_iter)
2896         tracepoint_printk = 0;
2897 
2898     if (save_tracepoint_printk == tracepoint_printk)
2899         goto out;
2900 
2901     if (tracepoint_printk)
2902         static_key_enable(&tracepoint_printk_key.key);
2903     else
2904         static_key_disable(&tracepoint_printk_key.key);
2905 
2906  out:
2907     mutex_unlock(&tracepoint_printk_mutex);
2908 
2909     return ret;
2910 }
2911 
2912 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2913 {
2914     enum event_trigger_type tt = ETT_NONE;
2915     struct trace_event_file *file = fbuffer->trace_file;
2916 
2917     if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2918             fbuffer->entry, &tt))
2919         goto discard;
2920 
2921     if (static_key_false(&tracepoint_printk_key.key))
2922         output_printk(fbuffer);
2923 
2924     if (static_branch_unlikely(&trace_event_exports_enabled))
2925         ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2926 
2927     trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2928             fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2929 
2930 discard:
2931     if (tt)
2932         event_triggers_post_call(file, tt);
2933 
2934 }
2935 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2936 
2937 /*
2938  * Skip 3:
2939  *
2940  *   trace_buffer_unlock_commit_regs()
2941  *   trace_event_buffer_commit()
2942  *   trace_event_raw_event_xxx()
2943  */
2944 # define STACK_SKIP 3
2945 
2946 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2947                      struct trace_buffer *buffer,
2948                      struct ring_buffer_event *event,
2949                      unsigned int trace_ctx,
2950                      struct pt_regs *regs)
2951 {
2952     __buffer_unlock_commit(buffer, event);
2953 
2954     /*
2955      * If regs is not set, then skip the necessary functions.
2956      * Note, we can still get here via blktrace, wakeup tracer
2957      * and mmiotrace, but that's ok if they lose a function or
2958      * two. They are not that meaningful.
2959      */
2960     ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2961     ftrace_trace_userstack(tr, buffer, trace_ctx);
2962 }
2963 
2964 /*
2965  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2966  */
2967 void
2968 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2969                    struct ring_buffer_event *event)
2970 {
2971     __buffer_unlock_commit(buffer, event);
2972 }
2973 
2974 void
2975 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2976            parent_ip, unsigned int trace_ctx)
2977 {
2978     struct trace_event_call *call = &event_function;
2979     struct trace_buffer *buffer = tr->array_buffer.buffer;
2980     struct ring_buffer_event *event;
2981     struct ftrace_entry *entry;
2982 
2983     event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2984                         trace_ctx);
2985     if (!event)
2986         return;
2987     entry   = ring_buffer_event_data(event);
2988     entry->ip           = ip;
2989     entry->parent_ip        = parent_ip;
2990 
2991     if (!call_filter_check_discard(call, entry, buffer, event)) {
2992         if (static_branch_unlikely(&trace_function_exports_enabled))
2993             ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2994         __buffer_unlock_commit(buffer, event);
2995     }
2996 }
2997 
2998 #ifdef CONFIG_STACKTRACE
2999 
3000 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3001 #define FTRACE_KSTACK_NESTING   4
3002 
3003 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3004 
3005 struct ftrace_stack {
3006     unsigned long       calls[FTRACE_KSTACK_ENTRIES];
3007 };
3008 
3009 
3010 struct ftrace_stacks {
3011     struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
3012 };
3013 
3014 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3015 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3016 
3017 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3018                  unsigned int trace_ctx,
3019                  int skip, struct pt_regs *regs)
3020 {
3021     struct trace_event_call *call = &event_kernel_stack;
3022     struct ring_buffer_event *event;
3023     unsigned int size, nr_entries;
3024     struct ftrace_stack *fstack;
3025     struct stack_entry *entry;
3026     int stackidx;
3027 
3028     /*
3029      * Add one, for this function and the call to save_stack_trace()
3030      * If regs is set, then these functions will not be in the way.
3031      */
3032 #ifndef CONFIG_UNWINDER_ORC
3033     if (!regs)
3034         skip++;
3035 #endif
3036 
3037     preempt_disable_notrace();
3038 
3039     stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3040 
3041     /* This should never happen. If it does, yell once and skip */
3042     if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3043         goto out;
3044 
3045     /*
3046      * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3047      * interrupt will either see the value pre increment or post
3048      * increment. If the interrupt happens pre increment it will have
3049      * restored the counter when it returns.  We just need a barrier to
3050      * keep gcc from moving things around.
3051      */
3052     barrier();
3053 
3054     fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3055     size = ARRAY_SIZE(fstack->calls);
3056 
3057     if (regs) {
3058         nr_entries = stack_trace_save_regs(regs, fstack->calls,
3059                            size, skip);
3060     } else {
3061         nr_entries = stack_trace_save(fstack->calls, size, skip);
3062     }
3063 
3064     size = nr_entries * sizeof(unsigned long);
3065     event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3066                     (sizeof(*entry) - sizeof(entry->caller)) + size,
3067                     trace_ctx);
3068     if (!event)
3069         goto out;
3070     entry = ring_buffer_event_data(event);
3071 
3072     memcpy(&entry->caller, fstack->calls, size);
3073     entry->size = nr_entries;
3074 
3075     if (!call_filter_check_discard(call, entry, buffer, event))
3076         __buffer_unlock_commit(buffer, event);
3077 
3078  out:
3079     /* Again, don't let gcc optimize things here */
3080     barrier();
3081     __this_cpu_dec(ftrace_stack_reserve);
3082     preempt_enable_notrace();
3083 
3084 }
3085 
3086 static inline void ftrace_trace_stack(struct trace_array *tr,
3087                       struct trace_buffer *buffer,
3088                       unsigned int trace_ctx,
3089                       int skip, struct pt_regs *regs)
3090 {
3091     if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3092         return;
3093 
3094     __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3095 }
3096 
3097 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3098            int skip)
3099 {
3100     struct trace_buffer *buffer = tr->array_buffer.buffer;
3101 
3102     if (rcu_is_watching()) {
3103         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3104         return;
3105     }
3106 
3107     /*
3108      * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3109      * but if the above rcu_is_watching() failed, then the NMI
3110      * triggered someplace critical, and ct_irq_enter() should
3111      * not be called from NMI.
3112      */
3113     if (unlikely(in_nmi()))
3114         return;
3115 
3116     ct_irq_enter_irqson();
3117     __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3118     ct_irq_exit_irqson();
3119 }
3120 
3121 /**
3122  * trace_dump_stack - record a stack back trace in the trace buffer
3123  * @skip: Number of functions to skip (helper handlers)
3124  */
3125 void trace_dump_stack(int skip)
3126 {
3127     if (tracing_disabled || tracing_selftest_running)
3128         return;
3129 
3130 #ifndef CONFIG_UNWINDER_ORC
3131     /* Skip 1 to skip this function. */
3132     skip++;
3133 #endif
3134     __ftrace_trace_stack(global_trace.array_buffer.buffer,
3135                  tracing_gen_ctx(), skip, NULL);
3136 }
3137 EXPORT_SYMBOL_GPL(trace_dump_stack);
3138 
3139 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3140 static DEFINE_PER_CPU(int, user_stack_count);
3141 
3142 static void
3143 ftrace_trace_userstack(struct trace_array *tr,
3144                struct trace_buffer *buffer, unsigned int trace_ctx)
3145 {
3146     struct trace_event_call *call = &event_user_stack;
3147     struct ring_buffer_event *event;
3148     struct userstack_entry *entry;
3149 
3150     if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3151         return;
3152 
3153     /*
3154      * NMIs can not handle page faults, even with fix ups.
3155      * The save user stack can (and often does) fault.
3156      */
3157     if (unlikely(in_nmi()))
3158         return;
3159 
3160     /*
3161      * prevent recursion, since the user stack tracing may
3162      * trigger other kernel events.
3163      */
3164     preempt_disable();
3165     if (__this_cpu_read(user_stack_count))
3166         goto out;
3167 
3168     __this_cpu_inc(user_stack_count);
3169 
3170     event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3171                         sizeof(*entry), trace_ctx);
3172     if (!event)
3173         goto out_drop_count;
3174     entry   = ring_buffer_event_data(event);
3175 
3176     entry->tgid     = current->tgid;
3177     memset(&entry->caller, 0, sizeof(entry->caller));
3178 
3179     stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3180     if (!call_filter_check_discard(call, entry, buffer, event))
3181         __buffer_unlock_commit(buffer, event);
3182 
3183  out_drop_count:
3184     __this_cpu_dec(user_stack_count);
3185  out:
3186     preempt_enable();
3187 }
3188 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3189 static void ftrace_trace_userstack(struct trace_array *tr,
3190                    struct trace_buffer *buffer,
3191                    unsigned int trace_ctx)
3192 {
3193 }
3194 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3195 
3196 #endif /* CONFIG_STACKTRACE */
3197 
3198 static inline void
3199 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3200               unsigned long long delta)
3201 {
3202     entry->bottom_delta_ts = delta & U32_MAX;
3203     entry->top_delta_ts = (delta >> 32);
3204 }
3205 
3206 void trace_last_func_repeats(struct trace_array *tr,
3207                  struct trace_func_repeats *last_info,
3208                  unsigned int trace_ctx)
3209 {
3210     struct trace_buffer *buffer = tr->array_buffer.buffer;
3211     struct func_repeats_entry *entry;
3212     struct ring_buffer_event *event;
3213     u64 delta;
3214 
3215     event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3216                         sizeof(*entry), trace_ctx);
3217     if (!event)
3218         return;
3219 
3220     delta = ring_buffer_event_time_stamp(buffer, event) -
3221         last_info->ts_last_call;
3222 
3223     entry = ring_buffer_event_data(event);
3224     entry->ip = last_info->ip;
3225     entry->parent_ip = last_info->parent_ip;
3226     entry->count = last_info->count;
3227     func_repeats_set_delta_ts(entry, delta);
3228 
3229     __buffer_unlock_commit(buffer, event);
3230 }
3231 
3232 /* created for use with alloc_percpu */
3233 struct trace_buffer_struct {
3234     int nesting;
3235     char buffer[4][TRACE_BUF_SIZE];
3236 };
3237 
3238 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3239 
3240 /*
3241  * This allows for lockless recording.  If we're nested too deeply, then
3242  * this returns NULL.
3243  */
3244 static char *get_trace_buf(void)
3245 {
3246     struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3247 
3248     if (!trace_percpu_buffer || buffer->nesting >= 4)
3249         return NULL;
3250 
3251     buffer->nesting++;
3252 
3253     /* Interrupts must see nesting incremented before we use the buffer */
3254     barrier();
3255     return &buffer->buffer[buffer->nesting - 1][0];
3256 }
3257 
3258 static void put_trace_buf(void)
3259 {
3260     /* Don't let the decrement of nesting leak before this */
3261     barrier();
3262     this_cpu_dec(trace_percpu_buffer->nesting);
3263 }
3264 
3265 static int alloc_percpu_trace_buffer(void)
3266 {
3267     struct trace_buffer_struct __percpu *buffers;
3268 
3269     if (trace_percpu_buffer)
3270         return 0;
3271 
3272     buffers = alloc_percpu(struct trace_buffer_struct);
3273     if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3274         return -ENOMEM;
3275 
3276     trace_percpu_buffer = buffers;
3277     return 0;
3278 }
3279 
3280 static int buffers_allocated;
3281 
3282 void trace_printk_init_buffers(void)
3283 {
3284     if (buffers_allocated)
3285         return;
3286 
3287     if (alloc_percpu_trace_buffer())
3288         return;
3289 
3290     /* trace_printk() is for debug use only. Don't use it in production. */
3291 
3292     pr_warn("\n");
3293     pr_warn("**********************************************************\n");
3294     pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3295     pr_warn("**                                                      **\n");
3296     pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3297     pr_warn("**                                                      **\n");
3298     pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3299     pr_warn("** unsafe for production use.                           **\n");
3300     pr_warn("**                                                      **\n");
3301     pr_warn("** If you see this message and you are not debugging    **\n");
3302     pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3303     pr_warn("**                                                      **\n");
3304     pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3305     pr_warn("**********************************************************\n");
3306 
3307     /* Expand the buffers to set size */
3308     tracing_update_buffers();
3309 
3310     buffers_allocated = 1;
3311 
3312     /*
3313      * trace_printk_init_buffers() can be called by modules.
3314      * If that happens, then we need to start cmdline recording
3315      * directly here. If the global_trace.buffer is already
3316      * allocated here, then this was called by module code.
3317      */
3318     if (global_trace.array_buffer.buffer)
3319         tracing_start_cmdline_record();
3320 }
3321 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3322 
3323 void trace_printk_start_comm(void)
3324 {
3325     /* Start tracing comms if trace printk is set */
3326     if (!buffers_allocated)
3327         return;
3328     tracing_start_cmdline_record();
3329 }
3330 
3331 static void trace_printk_start_stop_comm(int enabled)
3332 {
3333     if (!buffers_allocated)
3334         return;
3335 
3336     if (enabled)
3337         tracing_start_cmdline_record();
3338     else
3339         tracing_stop_cmdline_record();
3340 }
3341 
3342 /**
3343  * trace_vbprintk - write binary msg to tracing buffer
3344  * @ip:    The address of the caller
3345  * @fmt:   The string format to write to the buffer
3346  * @args:  Arguments for @fmt
3347  */
3348 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3349 {
3350     struct trace_event_call *call = &event_bprint;
3351     struct ring_buffer_event *event;
3352     struct trace_buffer *buffer;
3353     struct trace_array *tr = &global_trace;
3354     struct bprint_entry *entry;
3355     unsigned int trace_ctx;
3356     char *tbuffer;
3357     int len = 0, size;
3358 
3359     if (unlikely(tracing_selftest_running || tracing_disabled))
3360         return 0;
3361 
3362     /* Don't pollute graph traces with trace_vprintk internals */
3363     pause_graph_tracing();
3364 
3365     trace_ctx = tracing_gen_ctx();
3366     preempt_disable_notrace();
3367 
3368     tbuffer = get_trace_buf();
3369     if (!tbuffer) {
3370         len = 0;
3371         goto out_nobuffer;
3372     }
3373 
3374     len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3375 
3376     if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3377         goto out_put;
3378 
3379     size = sizeof(*entry) + sizeof(u32) * len;
3380     buffer = tr->array_buffer.buffer;
3381     ring_buffer_nest_start(buffer);
3382     event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3383                         trace_ctx);
3384     if (!event)
3385         goto out;
3386     entry = ring_buffer_event_data(event);
3387     entry->ip           = ip;
3388     entry->fmt          = fmt;
3389 
3390     memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3391     if (!call_filter_check_discard(call, entry, buffer, event)) {
3392         __buffer_unlock_commit(buffer, event);
3393         ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3394     }
3395 
3396 out:
3397     ring_buffer_nest_end(buffer);
3398 out_put:
3399     put_trace_buf();
3400 
3401 out_nobuffer:
3402     preempt_enable_notrace();
3403     unpause_graph_tracing();
3404 
3405     return len;
3406 }
3407 EXPORT_SYMBOL_GPL(trace_vbprintk);
3408 
3409 __printf(3, 0)
3410 static int
3411 __trace_array_vprintk(struct trace_buffer *buffer,
3412               unsigned long ip, const char *fmt, va_list args)
3413 {
3414     struct trace_event_call *call = &event_print;
3415     struct ring_buffer_event *event;
3416     int len = 0, size;
3417     struct print_entry *entry;
3418     unsigned int trace_ctx;
3419     char *tbuffer;
3420 
3421     if (tracing_disabled || tracing_selftest_running)
3422         return 0;
3423 
3424     /* Don't pollute graph traces with trace_vprintk internals */
3425     pause_graph_tracing();
3426 
3427     trace_ctx = tracing_gen_ctx();
3428     preempt_disable_notrace();
3429 
3430 
3431     tbuffer = get_trace_buf();
3432     if (!tbuffer) {
3433         len = 0;
3434         goto out_nobuffer;
3435     }
3436 
3437     len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3438 
3439     size = sizeof(*entry) + len + 1;
3440     ring_buffer_nest_start(buffer);
3441     event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3442                         trace_ctx);
3443     if (!event)
3444         goto out;
3445     entry = ring_buffer_event_data(event);
3446     entry->ip = ip;
3447 
3448     memcpy(&entry->buf, tbuffer, len + 1);
3449     if (!call_filter_check_discard(call, entry, buffer, event)) {
3450         __buffer_unlock_commit(buffer, event);
3451         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3452     }
3453 
3454 out:
3455     ring_buffer_nest_end(buffer);
3456     put_trace_buf();
3457 
3458 out_nobuffer:
3459     preempt_enable_notrace();
3460     unpause_graph_tracing();
3461 
3462     return len;
3463 }
3464 
3465 __printf(3, 0)
3466 int trace_array_vprintk(struct trace_array *tr,
3467             unsigned long ip, const char *fmt, va_list args)
3468 {
3469     return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3470 }
3471 
3472 /**
3473  * trace_array_printk - Print a message to a specific instance
3474  * @tr: The instance trace_array descriptor
3475  * @ip: The instruction pointer that this is called from.
3476  * @fmt: The format to print (printf format)
3477  *
3478  * If a subsystem sets up its own instance, they have the right to
3479  * printk strings into their tracing instance buffer using this
3480  * function. Note, this function will not write into the top level
3481  * buffer (use trace_printk() for that), as writing into the top level
3482  * buffer should only have events that can be individually disabled.
3483  * trace_printk() is only used for debugging a kernel, and should not
3484  * be ever incorporated in normal use.
3485  *
3486  * trace_array_printk() can be used, as it will not add noise to the
3487  * top level tracing buffer.
3488  *
3489  * Note, trace_array_init_printk() must be called on @tr before this
3490  * can be used.
3491  */
3492 __printf(3, 0)
3493 int trace_array_printk(struct trace_array *tr,
3494                unsigned long ip, const char *fmt, ...)
3495 {
3496     int ret;
3497     va_list ap;
3498 
3499     if (!tr)
3500         return -ENOENT;
3501 
3502     /* This is only allowed for created instances */
3503     if (tr == &global_trace)
3504         return 0;
3505 
3506     if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3507         return 0;
3508 
3509     va_start(ap, fmt);
3510     ret = trace_array_vprintk(tr, ip, fmt, ap);
3511     va_end(ap);
3512     return ret;
3513 }
3514 EXPORT_SYMBOL_GPL(trace_array_printk);
3515 
3516 /**
3517  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3518  * @tr: The trace array to initialize the buffers for
3519  *
3520  * As trace_array_printk() only writes into instances, they are OK to
3521  * have in the kernel (unlike trace_printk()). This needs to be called
3522  * before trace_array_printk() can be used on a trace_array.
3523  */
3524 int trace_array_init_printk(struct trace_array *tr)
3525 {
3526     if (!tr)
3527         return -ENOENT;
3528 
3529     /* This is only allowed for created instances */
3530     if (tr == &global_trace)
3531         return -EINVAL;
3532 
3533     return alloc_percpu_trace_buffer();
3534 }
3535 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3536 
3537 __printf(3, 4)
3538 int trace_array_printk_buf(struct trace_buffer *buffer,
3539                unsigned long ip, const char *fmt, ...)
3540 {
3541     int ret;
3542     va_list ap;
3543 
3544     if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3545         return 0;
3546 
3547     va_start(ap, fmt);
3548     ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3549     va_end(ap);
3550     return ret;
3551 }
3552 
3553 __printf(2, 0)
3554 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3555 {
3556     return trace_array_vprintk(&global_trace, ip, fmt, args);
3557 }
3558 EXPORT_SYMBOL_GPL(trace_vprintk);
3559 
3560 static void trace_iterator_increment(struct trace_iterator *iter)
3561 {
3562     struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3563 
3564     iter->idx++;
3565     if (buf_iter)
3566         ring_buffer_iter_advance(buf_iter);
3567 }
3568 
3569 static struct trace_entry *
3570 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3571         unsigned long *lost_events)
3572 {
3573     struct ring_buffer_event *event;
3574     struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3575 
3576     if (buf_iter) {
3577         event = ring_buffer_iter_peek(buf_iter, ts);
3578         if (lost_events)
3579             *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3580                 (unsigned long)-1 : 0;
3581     } else {
3582         event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3583                      lost_events);
3584     }
3585 
3586     if (event) {
3587         iter->ent_size = ring_buffer_event_length(event);
3588         return ring_buffer_event_data(event);
3589     }
3590     iter->ent_size = 0;
3591     return NULL;
3592 }
3593 
3594 static struct trace_entry *
3595 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3596           unsigned long *missing_events, u64 *ent_ts)
3597 {
3598     struct trace_buffer *buffer = iter->array_buffer->buffer;
3599     struct trace_entry *ent, *next = NULL;
3600     unsigned long lost_events = 0, next_lost = 0;
3601     int cpu_file = iter->cpu_file;
3602     u64 next_ts = 0, ts;
3603     int next_cpu = -1;
3604     int next_size = 0;
3605     int cpu;
3606 
3607     /*
3608      * If we are in a per_cpu trace file, don't bother by iterating over
3609      * all cpu and peek directly.
3610      */
3611     if (cpu_file > RING_BUFFER_ALL_CPUS) {
3612         if (ring_buffer_empty_cpu(buffer, cpu_file))
3613             return NULL;
3614         ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3615         if (ent_cpu)
3616             *ent_cpu = cpu_file;
3617 
3618         return ent;
3619     }
3620 
3621     for_each_tracing_cpu(cpu) {
3622 
3623         if (ring_buffer_empty_cpu(buffer, cpu))
3624             continue;
3625 
3626         ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3627 
3628         /*
3629          * Pick the entry with the smallest timestamp:
3630          */
3631         if (ent && (!next || ts < next_ts)) {
3632             next = ent;
3633             next_cpu = cpu;
3634             next_ts = ts;
3635             next_lost = lost_events;
3636             next_size = iter->ent_size;
3637         }
3638     }
3639 
3640     iter->ent_size = next_size;
3641 
3642     if (ent_cpu)
3643         *ent_cpu = next_cpu;
3644 
3645     if (ent_ts)
3646         *ent_ts = next_ts;
3647 
3648     if (missing_events)
3649         *missing_events = next_lost;
3650 
3651     return next;
3652 }
3653 
3654 #define STATIC_FMT_BUF_SIZE 128
3655 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3656 
3657 static char *trace_iter_expand_format(struct trace_iterator *iter)
3658 {
3659     char *tmp;
3660 
3661     /*
3662      * iter->tr is NULL when used with tp_printk, which makes
3663      * this get called where it is not safe to call krealloc().
3664      */
3665     if (!iter->tr || iter->fmt == static_fmt_buf)
3666         return NULL;
3667 
3668     tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3669                GFP_KERNEL);
3670     if (tmp) {
3671         iter->fmt_size += STATIC_FMT_BUF_SIZE;
3672         iter->fmt = tmp;
3673     }
3674 
3675     return tmp;
3676 }
3677 
3678 /* Returns true if the string is safe to dereference from an event */
3679 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3680                bool star, int len)
3681 {
3682     unsigned long addr = (unsigned long)str;
3683     struct trace_event *trace_event;
3684     struct trace_event_call *event;
3685 
3686     /* Ignore strings with no length */
3687     if (star && !len)
3688         return true;
3689 
3690     /* OK if part of the event data */
3691     if ((addr >= (unsigned long)iter->ent) &&
3692         (addr < (unsigned long)iter->ent + iter->ent_size))
3693         return true;
3694 
3695     /* OK if part of the temp seq buffer */
3696     if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3697         (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3698         return true;
3699 
3700     /* Core rodata can not be freed */
3701     if (is_kernel_rodata(addr))
3702         return true;
3703 
3704     if (trace_is_tracepoint_string(str))
3705         return true;
3706 
3707     /*
3708      * Now this could be a module event, referencing core module
3709      * data, which is OK.
3710      */
3711     if (!iter->ent)
3712         return false;
3713 
3714     trace_event = ftrace_find_event(iter->ent->type);
3715     if (!trace_event)
3716         return false;
3717 
3718     event = container_of(trace_event, struct trace_event_call, event);
3719     if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3720         return false;
3721 
3722     /* Would rather have rodata, but this will suffice */
3723     if (within_module_core(addr, event->module))
3724         return true;
3725 
3726     return false;
3727 }
3728 
3729 static const char *show_buffer(struct trace_seq *s)
3730 {
3731     struct seq_buf *seq = &s->seq;
3732 
3733     seq_buf_terminate(seq);
3734 
3735     return seq->buffer;
3736 }
3737 
3738 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3739 
3740 static int test_can_verify_check(const char *fmt, ...)
3741 {
3742     char buf[16];
3743     va_list ap;
3744     int ret;
3745 
3746     /*
3747      * The verifier is dependent on vsnprintf() modifies the va_list
3748      * passed to it, where it is sent as a reference. Some architectures
3749      * (like x86_32) passes it by value, which means that vsnprintf()
3750      * does not modify the va_list passed to it, and the verifier
3751      * would then need to be able to understand all the values that
3752      * vsnprintf can use. If it is passed by value, then the verifier
3753      * is disabled.
3754      */
3755     va_start(ap, fmt);
3756     vsnprintf(buf, 16, "%d", ap);
3757     ret = va_arg(ap, int);
3758     va_end(ap);
3759 
3760     return ret;
3761 }
3762 
3763 static void test_can_verify(void)
3764 {
3765     if (!test_can_verify_check("%d %d", 0, 1)) {
3766         pr_info("trace event string verifier disabled\n");
3767         static_branch_inc(&trace_no_verify);
3768     }
3769 }
3770 
3771 /**
3772  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3773  * @iter: The iterator that holds the seq buffer and the event being printed
3774  * @fmt: The format used to print the event
3775  * @ap: The va_list holding the data to print from @fmt.
3776  *
3777  * This writes the data into the @iter->seq buffer using the data from
3778  * @fmt and @ap. If the format has a %s, then the source of the string
3779  * is examined to make sure it is safe to print, otherwise it will
3780  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3781  * pointer.
3782  */
3783 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3784              va_list ap)
3785 {
3786     const char *p = fmt;
3787     const char *str;
3788     int i, j;
3789 
3790     if (WARN_ON_ONCE(!fmt))
3791         return;
3792 
3793     if (static_branch_unlikely(&trace_no_verify))
3794         goto print;
3795 
3796     /* Don't bother checking when doing a ftrace_dump() */
3797     if (iter->fmt == static_fmt_buf)
3798         goto print;
3799 
3800     while (*p) {
3801         bool star = false;
3802         int len = 0;
3803 
3804         j = 0;
3805 
3806         /* We only care about %s and variants */
3807         for (i = 0; p[i]; i++) {
3808             if (i + 1 >= iter->fmt_size) {
3809                 /*
3810                  * If we can't expand the copy buffer,
3811                  * just print it.
3812                  */
3813                 if (!trace_iter_expand_format(iter))
3814                     goto print;
3815             }
3816 
3817             if (p[i] == '\\' && p[i+1]) {
3818                 i++;
3819                 continue;
3820             }
3821             if (p[i] == '%') {
3822                 /* Need to test cases like %08.*s */
3823                 for (j = 1; p[i+j]; j++) {
3824                     if (isdigit(p[i+j]) ||
3825                         p[i+j] == '.')
3826                         continue;
3827                     if (p[i+j] == '*') {
3828                         star = true;
3829                         continue;
3830                     }
3831                     break;
3832                 }
3833                 if (p[i+j] == 's')
3834                     break;
3835                 star = false;
3836             }
3837             j = 0;
3838         }
3839         /* If no %s found then just print normally */
3840         if (!p[i])
3841             break;
3842 
3843         /* Copy up to the %s, and print that */
3844         strncpy(iter->fmt, p, i);
3845         iter->fmt[i] = '\0';
3846         trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3847 
3848         /*
3849          * If iter->seq is full, the above call no longer guarantees
3850          * that ap is in sync with fmt processing, and further calls
3851          * to va_arg() can return wrong positional arguments.
3852          *
3853          * Ensure that ap is no longer used in this case.
3854          */
3855         if (iter->seq.full) {
3856             p = "";
3857             break;
3858         }
3859 
3860         if (star)
3861             len = va_arg(ap, int);
3862 
3863         /* The ap now points to the string data of the %s */
3864         str = va_arg(ap, const char *);
3865 
3866         /*
3867          * If you hit this warning, it is likely that the
3868          * trace event in question used %s on a string that
3869          * was saved at the time of the event, but may not be
3870          * around when the trace is read. Use __string(),
3871          * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3872          * instead. See samples/trace_events/trace-events-sample.h
3873          * for reference.
3874          */
3875         if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3876                   "fmt: '%s' current_buffer: '%s'",
3877                   fmt, show_buffer(&iter->seq))) {
3878             int ret;
3879 
3880             /* Try to safely read the string */
3881             if (star) {
3882                 if (len + 1 > iter->fmt_size)
3883                     len = iter->fmt_size - 1;
3884                 if (len < 0)
3885                     len = 0;
3886                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3887                 iter->fmt[len] = 0;
3888                 star = false;
3889             } else {
3890                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3891                                   iter->fmt_size);
3892             }
3893             if (ret < 0)
3894                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3895             else
3896                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3897                          str, iter->fmt);
3898             str = "[UNSAFE-MEMORY]";
3899             strcpy(iter->fmt, "%s");
3900         } else {
3901             strncpy(iter->fmt, p + i, j + 1);
3902             iter->fmt[j+1] = '\0';
3903         }
3904         if (star)
3905             trace_seq_printf(&iter->seq, iter->fmt, len, str);
3906         else
3907             trace_seq_printf(&iter->seq, iter->fmt, str);
3908 
3909         p += i + j + 1;
3910     }
3911  print:
3912     if (*p)
3913         trace_seq_vprintf(&iter->seq, p, ap);
3914 }
3915 
3916 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3917 {
3918     const char *p, *new_fmt;
3919     char *q;
3920 
3921     if (WARN_ON_ONCE(!fmt))
3922         return fmt;
3923 
3924     if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3925         return fmt;
3926 
3927     p = fmt;
3928     new_fmt = q = iter->fmt;
3929     while (*p) {
3930         if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3931             if (!trace_iter_expand_format(iter))
3932                 return fmt;
3933 
3934             q += iter->fmt - new_fmt;
3935             new_fmt = iter->fmt;
3936         }
3937 
3938         *q++ = *p++;
3939 
3940         /* Replace %p with %px */
3941         if (p[-1] == '%') {
3942             if (p[0] == '%') {
3943                 *q++ = *p++;
3944             } else if (p[0] == 'p' && !isalnum(p[1])) {
3945                 *q++ = *p++;
3946                 *q++ = 'x';
3947             }
3948         }
3949     }
3950     *q = '\0';
3951 
3952     return new_fmt;
3953 }
3954 
3955 #define STATIC_TEMP_BUF_SIZE    128
3956 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3957 
3958 /* Find the next real entry, without updating the iterator itself */
3959 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3960                       int *ent_cpu, u64 *ent_ts)
3961 {
3962     /* __find_next_entry will reset ent_size */
3963     int ent_size = iter->ent_size;
3964     struct trace_entry *entry;
3965 
3966     /*
3967      * If called from ftrace_dump(), then the iter->temp buffer
3968      * will be the static_temp_buf and not created from kmalloc.
3969      * If the entry size is greater than the buffer, we can
3970      * not save it. Just return NULL in that case. This is only
3971      * used to add markers when two consecutive events' time
3972      * stamps have a large delta. See trace_print_lat_context()
3973      */
3974     if (iter->temp == static_temp_buf &&
3975         STATIC_TEMP_BUF_SIZE < ent_size)
3976         return NULL;
3977 
3978     /*
3979      * The __find_next_entry() may call peek_next_entry(), which may
3980      * call ring_buffer_peek() that may make the contents of iter->ent
3981      * undefined. Need to copy iter->ent now.
3982      */
3983     if (iter->ent && iter->ent != iter->temp) {
3984         if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3985             !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3986             void *temp;
3987             temp = kmalloc(iter->ent_size, GFP_KERNEL);
3988             if (!temp)
3989                 return NULL;
3990             kfree(iter->temp);
3991             iter->temp = temp;
3992             iter->temp_size = iter->ent_size;
3993         }
3994         memcpy(iter->temp, iter->ent, iter->ent_size);
3995         iter->ent = iter->temp;
3996     }
3997     entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3998     /* Put back the original ent_size */
3999     iter->ent_size = ent_size;
4000 
4001     return entry;
4002 }
4003 
4004 /* Find the next real entry, and increment the iterator to the next entry */
4005 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4006 {
4007     iter->ent = __find_next_entry(iter, &iter->cpu,
4008                       &iter->lost_events, &iter->ts);
4009 
4010     if (iter->ent)
4011         trace_iterator_increment(iter);
4012 
4013     return iter->ent ? iter : NULL;
4014 }
4015 
4016 static void trace_consume(struct trace_iterator *iter)
4017 {
4018     ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4019                 &iter->lost_events);
4020 }
4021 
4022 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4023 {
4024     struct trace_iterator *iter = m->private;
4025     int i = (int)*pos;
4026     void *ent;
4027 
4028     WARN_ON_ONCE(iter->leftover);
4029 
4030     (*pos)++;
4031 
4032     /* can't go backwards */
4033     if (iter->idx > i)
4034         return NULL;
4035 
4036     if (iter->idx < 0)
4037         ent = trace_find_next_entry_inc(iter);
4038     else
4039         ent = iter;
4040 
4041     while (ent && iter->idx < i)
4042         ent = trace_find_next_entry_inc(iter);
4043 
4044     iter->pos = *pos;
4045 
4046     return ent;
4047 }
4048 
4049 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4050 {
4051     struct ring_buffer_iter *buf_iter;
4052     unsigned long entries = 0;
4053     u64 ts;
4054 
4055     per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4056 
4057     buf_iter = trace_buffer_iter(iter, cpu);
4058     if (!buf_iter)
4059         return;
4060 
4061     ring_buffer_iter_reset(buf_iter);
4062 
4063     /*
4064      * We could have the case with the max latency tracers
4065      * that a reset never took place on a cpu. This is evident
4066      * by the timestamp being before the start of the buffer.
4067      */
4068     while (ring_buffer_iter_peek(buf_iter, &ts)) {
4069         if (ts >= iter->array_buffer->time_start)
4070             break;
4071         entries++;
4072         ring_buffer_iter_advance(buf_iter);
4073     }
4074 
4075     per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4076 }
4077 
4078 /*
4079  * The current tracer is copied to avoid a global locking
4080  * all around.
4081  */
4082 static void *s_start(struct seq_file *m, loff_t *pos)
4083 {
4084     struct trace_iterator *iter = m->private;
4085     struct trace_array *tr = iter->tr;
4086     int cpu_file = iter->cpu_file;
4087     void *p = NULL;
4088     loff_t l = 0;
4089     int cpu;
4090 
4091     /*
4092      * copy the tracer to avoid using a global lock all around.
4093      * iter->trace is a copy of current_trace, the pointer to the
4094      * name may be used instead of a strcmp(), as iter->trace->name
4095      * will point to the same string as current_trace->name.
4096      */
4097     mutex_lock(&trace_types_lock);
4098     if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
4099         *iter->trace = *tr->current_trace;
4100     mutex_unlock(&trace_types_lock);
4101 
4102 #ifdef CONFIG_TRACER_MAX_TRACE
4103     if (iter->snapshot && iter->trace->use_max_tr)
4104         return ERR_PTR(-EBUSY);
4105 #endif
4106 
4107     if (*pos != iter->pos) {
4108         iter->ent = NULL;
4109         iter->cpu = 0;
4110         iter->idx = -1;
4111 
4112         if (cpu_file == RING_BUFFER_ALL_CPUS) {
4113             for_each_tracing_cpu(cpu)
4114                 tracing_iter_reset(iter, cpu);
4115         } else
4116             tracing_iter_reset(iter, cpu_file);
4117 
4118         iter->leftover = 0;
4119         for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4120             ;
4121 
4122     } else {
4123         /*
4124          * If we overflowed the seq_file before, then we want
4125          * to just reuse the trace_seq buffer again.
4126          */
4127         if (iter->leftover)
4128             p = iter;
4129         else {
4130             l = *pos - 1;
4131             p = s_next(m, p, &l);
4132         }
4133     }
4134 
4135     trace_event_read_lock();
4136     trace_access_lock(cpu_file);
4137     return p;
4138 }
4139 
4140 static void s_stop(struct seq_file *m, void *p)
4141 {
4142     struct trace_iterator *iter = m->private;
4143 
4144 #ifdef CONFIG_TRACER_MAX_TRACE
4145     if (iter->snapshot && iter->trace->use_max_tr)
4146         return;
4147 #endif
4148 
4149     trace_access_unlock(iter->cpu_file);
4150     trace_event_read_unlock();
4151 }
4152 
4153 static void
4154 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4155               unsigned long *entries, int cpu)
4156 {
4157     unsigned long count;
4158 
4159     count = ring_buffer_entries_cpu(buf->buffer, cpu);
4160     /*
4161      * If this buffer has skipped entries, then we hold all
4162      * entries for the trace and we need to ignore the
4163      * ones before the time stamp.
4164      */
4165     if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4166         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4167         /* total is the same as the entries */
4168         *total = count;
4169     } else
4170         *total = count +
4171             ring_buffer_overrun_cpu(buf->buffer, cpu);
4172     *entries = count;
4173 }
4174 
4175 static void
4176 get_total_entries(struct array_buffer *buf,
4177           unsigned long *total, unsigned long *entries)
4178 {
4179     unsigned long t, e;
4180     int cpu;
4181 
4182     *total = 0;
4183     *entries = 0;
4184 
4185     for_each_tracing_cpu(cpu) {
4186         get_total_entries_cpu(buf, &t, &e, cpu);
4187         *total += t;
4188         *entries += e;
4189     }
4190 }
4191 
4192 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4193 {
4194     unsigned long total, entries;
4195 
4196     if (!tr)
4197         tr = &global_trace;
4198 
4199     get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4200 
4201     return entries;
4202 }
4203 
4204 unsigned long trace_total_entries(struct trace_array *tr)
4205 {
4206     unsigned long total, entries;
4207 
4208     if (!tr)
4209         tr = &global_trace;
4210 
4211     get_total_entries(&tr->array_buffer, &total, &entries);
4212 
4213     return entries;
4214 }
4215 
4216 static void print_lat_help_header(struct seq_file *m)
4217 {
4218     seq_puts(m, "#                    _------=> CPU#            \n"
4219             "#                   / _-----=> irqs-off/BH-disabled\n"
4220             "#                  | / _----=> need-resched    \n"
4221             "#                  || / _---=> hardirq/softirq \n"
4222             "#                  ||| / _--=> preempt-depth   \n"
4223             "#                  |||| / _-=> migrate-disable \n"
4224             "#                  ||||| /     delay           \n"
4225             "#  cmd     pid     |||||| time  |   caller     \n"
4226             "#     \\   /        ||||||  \\    |    /       \n");
4227 }
4228 
4229 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4230 {
4231     unsigned long total;
4232     unsigned long entries;
4233 
4234     get_total_entries(buf, &total, &entries);
4235     seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4236            entries, total, num_online_cpus());
4237     seq_puts(m, "#\n");
4238 }
4239 
4240 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4241                    unsigned int flags)
4242 {
4243     bool tgid = flags & TRACE_ITER_RECORD_TGID;
4244 
4245     print_event_info(buf, m);
4246 
4247     seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4248     seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4249 }
4250 
4251 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4252                        unsigned int flags)
4253 {
4254     bool tgid = flags & TRACE_ITER_RECORD_TGID;
4255     static const char space[] = "            ";
4256     int prec = tgid ? 12 : 2;
4257 
4258     print_event_info(buf, m);
4259 
4260     seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4261     seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4262     seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4263     seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4264     seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4265     seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4266     seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4267     seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4268 }
4269 
4270 void
4271 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4272 {
4273     unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4274     struct array_buffer *buf = iter->array_buffer;
4275     struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4276     struct tracer *type = iter->trace;
4277     unsigned long entries;
4278     unsigned long total;
4279     const char *name = type->name;
4280 
4281     get_total_entries(buf, &total, &entries);
4282 
4283     seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4284            name, UTS_RELEASE);
4285     seq_puts(m, "# -----------------------------------"
4286          "---------------------------------\n");
4287     seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4288            " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4289            nsecs_to_usecs(data->saved_latency),
4290            entries,
4291            total,
4292            buf->cpu,
4293            preempt_model_none()      ? "server" :
4294            preempt_model_voluntary() ? "desktop" :
4295            preempt_model_full()      ? "preempt" :
4296            preempt_model_rt()        ? "preempt_rt" :
4297            "unknown",
4298            /* These are reserved for later use */
4299            0, 0, 0, 0);
4300 #ifdef CONFIG_SMP
4301     seq_printf(m, " #P:%d)\n", num_online_cpus());
4302 #else
4303     seq_puts(m, ")\n");
4304 #endif
4305     seq_puts(m, "#    -----------------\n");
4306     seq_printf(m, "#    | task: %.16s-%d "
4307            "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4308            data->comm, data->pid,
4309            from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4310            data->policy, data->rt_priority);
4311     seq_puts(m, "#    -----------------\n");
4312 
4313     if (data->critical_start) {
4314         seq_puts(m, "#  => started at: ");
4315         seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4316         trace_print_seq(m, &iter->seq);
4317         seq_puts(m, "\n#  => ended at:   ");
4318         seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4319         trace_print_seq(m, &iter->seq);
4320         seq_puts(m, "\n#\n");
4321     }
4322 
4323     seq_puts(m, "#\n");
4324 }
4325 
4326 static void test_cpu_buff_start(struct trace_iterator *iter)
4327 {
4328     struct trace_seq *s = &iter->seq;
4329     struct trace_array *tr = iter->tr;
4330 
4331     if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4332         return;
4333 
4334     if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4335         return;
4336 
4337     if (cpumask_available(iter->started) &&
4338         cpumask_test_cpu(iter->cpu, iter->started))
4339         return;
4340 
4341     if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4342         return;
4343 
4344     if (cpumask_available(iter->started))
4345         cpumask_set_cpu(iter->cpu, iter->started);
4346 
4347     /* Don't print started cpu buffer for the first entry of the trace */
4348     if (iter->idx > 1)
4349         trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4350                 iter->cpu);
4351 }
4352 
4353 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4354 {
4355     struct trace_array *tr = iter->tr;
4356     struct trace_seq *s = &iter->seq;
4357     unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4358     struct trace_entry *entry;
4359     struct trace_event *event;
4360 
4361     entry = iter->ent;
4362 
4363     test_cpu_buff_start(iter);
4364 
4365     event = ftrace_find_event(entry->type);
4366 
4367     if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4368         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4369             trace_print_lat_context(iter);
4370         else
4371             trace_print_context(iter);
4372     }
4373 
4374     if (trace_seq_has_overflowed(s))
4375         return TRACE_TYPE_PARTIAL_LINE;
4376 
4377     if (event)
4378         return event->funcs->trace(iter, sym_flags, event);
4379 
4380     trace_seq_printf(s, "Unknown type %d\n", entry->type);
4381 
4382     return trace_handle_return(s);
4383 }
4384 
4385 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4386 {
4387     struct trace_array *tr = iter->tr;
4388     struct trace_seq *s = &iter->seq;
4389     struct trace_entry *entry;
4390     struct trace_event *event;
4391 
4392     entry = iter->ent;
4393 
4394     if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4395         trace_seq_printf(s, "%d %d %llu ",
4396                  entry->pid, iter->cpu, iter->ts);
4397 
4398     if (trace_seq_has_overflowed(s))
4399         return TRACE_TYPE_PARTIAL_LINE;
4400 
4401     event = ftrace_find_event(entry->type);
4402     if (event)
4403         return event->funcs->raw(iter, 0, event);
4404 
4405     trace_seq_printf(s, "%d ?\n", entry->type);
4406 
4407     return trace_handle_return(s);
4408 }
4409 
4410 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4411 {
4412     struct trace_array *tr = iter->tr;
4413     struct trace_seq *s = &iter->seq;
4414     unsigned char newline = '\n';
4415     struct trace_entry *entry;
4416     struct trace_event *event;
4417 
4418     entry = iter->ent;
4419 
4420     if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4421         SEQ_PUT_HEX_FIELD(s, entry->pid);
4422         SEQ_PUT_HEX_FIELD(s, iter->cpu);
4423         SEQ_PUT_HEX_FIELD(s, iter->ts);
4424         if (trace_seq_has_overflowed(s))
4425             return TRACE_TYPE_PARTIAL_LINE;
4426     }
4427 
4428     event = ftrace_find_event(entry->type);
4429     if (event) {
4430         enum print_line_t ret = event->funcs->hex(iter, 0, event);
4431         if (ret != TRACE_TYPE_HANDLED)
4432             return ret;
4433     }
4434 
4435     SEQ_PUT_FIELD(s, newline);
4436 
4437     return trace_handle_return(s);
4438 }
4439 
4440 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4441 {
4442     struct trace_array *tr = iter->tr;
4443     struct trace_seq *s = &iter->seq;
4444     struct trace_entry *entry;
4445     struct trace_event *event;
4446 
4447     entry = iter->ent;
4448 
4449     if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4450         SEQ_PUT_FIELD(s, entry->pid);
4451         SEQ_PUT_FIELD(s, iter->cpu);
4452         SEQ_PUT_FIELD(s, iter->ts);
4453         if (trace_seq_has_overflowed(s))
4454             return TRACE_TYPE_PARTIAL_LINE;
4455     }
4456 
4457     event = ftrace_find_event(entry->type);
4458     return event ? event->funcs->binary(iter, 0, event) :
4459         TRACE_TYPE_HANDLED;
4460 }
4461 
4462 int trace_empty(struct trace_iterator *iter)
4463 {
4464     struct ring_buffer_iter *buf_iter;
4465     int cpu;
4466 
4467     /* If we are looking at one CPU buffer, only check that one */
4468     if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4469         cpu = iter->cpu_file;
4470         buf_iter = trace_buffer_iter(iter, cpu);
4471         if (buf_iter) {
4472             if (!ring_buffer_iter_empty(buf_iter))
4473                 return 0;
4474         } else {
4475             if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4476                 return 0;
4477         }
4478         return 1;
4479     }
4480 
4481     for_each_tracing_cpu(cpu) {
4482         buf_iter = trace_buffer_iter(iter, cpu);
4483         if (buf_iter) {
4484             if (!ring_buffer_iter_empty(buf_iter))
4485                 return 0;
4486         } else {
4487             if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4488                 return 0;
4489         }
4490     }
4491 
4492     return 1;
4493 }
4494 
4495 /*  Called with trace_event_read_lock() held. */
4496 enum print_line_t print_trace_line(struct trace_iterator *iter)
4497 {
4498     struct trace_array *tr = iter->tr;
4499     unsigned long trace_flags = tr->trace_flags;
4500     enum print_line_t ret;
4501 
4502     if (iter->lost_events) {
4503         if (iter->lost_events == (unsigned long)-1)
4504             trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4505                      iter->cpu);
4506         else
4507             trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4508                      iter->cpu, iter->lost_events);
4509         if (trace_seq_has_overflowed(&iter->seq))
4510             return TRACE_TYPE_PARTIAL_LINE;
4511     }
4512 
4513     if (iter->trace && iter->trace->print_line) {
4514         ret = iter->trace->print_line(iter);
4515         if (ret != TRACE_TYPE_UNHANDLED)
4516             return ret;
4517     }
4518 
4519     if (iter->ent->type == TRACE_BPUTS &&
4520             trace_flags & TRACE_ITER_PRINTK &&
4521             trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4522         return trace_print_bputs_msg_only(iter);
4523 
4524     if (iter->ent->type == TRACE_BPRINT &&
4525             trace_flags & TRACE_ITER_PRINTK &&
4526             trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4527         return trace_print_bprintk_msg_only(iter);
4528 
4529     if (iter->ent->type == TRACE_PRINT &&
4530             trace_flags & TRACE_ITER_PRINTK &&
4531             trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4532         return trace_print_printk_msg_only(iter);
4533 
4534     if (trace_flags & TRACE_ITER_BIN)
4535         return print_bin_fmt(iter);
4536 
4537     if (trace_flags & TRACE_ITER_HEX)
4538         return print_hex_fmt(iter);
4539 
4540     if (trace_flags & TRACE_ITER_RAW)
4541         return print_raw_fmt(iter);
4542 
4543     return print_trace_fmt(iter);
4544 }
4545 
4546 void trace_latency_header(struct seq_file *m)
4547 {
4548     struct trace_iterator *iter = m->private;
4549     struct trace_array *tr = iter->tr;
4550 
4551     /* print nothing if the buffers are empty */
4552     if (trace_empty(iter))
4553         return;
4554 
4555     if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4556         print_trace_header(m, iter);
4557 
4558     if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4559         print_lat_help_header(m);
4560 }
4561 
4562 void trace_default_header(struct seq_file *m)
4563 {
4564     struct trace_iterator *iter = m->private;
4565     struct trace_array *tr = iter->tr;
4566     unsigned long trace_flags = tr->trace_flags;
4567 
4568     if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4569         return;
4570 
4571     if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4572         /* print nothing if the buffers are empty */
4573         if (trace_empty(iter))
4574             return;
4575         print_trace_header(m, iter);
4576         if (!(trace_flags & TRACE_ITER_VERBOSE))
4577             print_lat_help_header(m);
4578     } else {
4579         if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4580             if (trace_flags & TRACE_ITER_IRQ_INFO)
4581                 print_func_help_header_irq(iter->array_buffer,
4582                                m, trace_flags);
4583             else
4584                 print_func_help_header(iter->array_buffer, m,
4585                                trace_flags);
4586         }
4587     }
4588 }
4589 
4590 static void test_ftrace_alive(struct seq_file *m)
4591 {
4592     if (!ftrace_is_dead())
4593         return;
4594     seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4595             "#          MAY BE MISSING FUNCTION EVENTS\n");
4596 }
4597 
4598 #ifdef CONFIG_TRACER_MAX_TRACE
4599 static void show_snapshot_main_help(struct seq_file *m)
4600 {
4601     seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4602             "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4603             "#                      Takes a snapshot of the main buffer.\n"
4604             "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4605             "#                      (Doesn't have to be '2' works with any number that\n"
4606             "#                       is not a '0' or '1')\n");
4607 }
4608 
4609 static void show_snapshot_percpu_help(struct seq_file *m)
4610 {
4611     seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4612 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4613     seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4614             "#                      Takes a snapshot of the main buffer for this cpu.\n");
4615 #else
4616     seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4617             "#                     Must use main snapshot file to allocate.\n");
4618 #endif
4619     seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4620             "#                      (Doesn't have to be '2' works with any number that\n"
4621             "#                       is not a '0' or '1')\n");
4622 }
4623 
4624 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4625 {
4626     if (iter->tr->allocated_snapshot)
4627         seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4628     else
4629         seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4630 
4631     seq_puts(m, "# Snapshot commands:\n");
4632     if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4633         show_snapshot_main_help(m);
4634     else
4635         show_snapshot_percpu_help(m);
4636 }
4637 #else
4638 /* Should never be called */
4639 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4640 #endif
4641 
4642 static int s_show(struct seq_file *m, void *v)
4643 {
4644     struct trace_iterator *iter = v;
4645     int ret;
4646 
4647     if (iter->ent == NULL) {
4648         if (iter->tr) {
4649             seq_printf(m, "# tracer: %s\n", iter->trace->name);
4650             seq_puts(m, "#\n");
4651             test_ftrace_alive(m);
4652         }
4653         if (iter->snapshot && trace_empty(iter))
4654             print_snapshot_help(m, iter);
4655         else if (iter->trace && iter->trace->print_header)
4656             iter->trace->print_header(m);
4657         else
4658             trace_default_header(m);
4659 
4660     } else if (iter->leftover) {
4661         /*
4662          * If we filled the seq_file buffer earlier, we
4663          * want to just show it now.
4664          */
4665         ret = trace_print_seq(m, &iter->seq);
4666 
4667         /* ret should this time be zero, but you never know */
4668         iter->leftover = ret;
4669 
4670     } else {
4671         print_trace_line(iter);
4672         ret = trace_print_seq(m, &iter->seq);
4673         /*
4674          * If we overflow the seq_file buffer, then it will
4675          * ask us for this data again at start up.
4676          * Use that instead.
4677          *  ret is 0 if seq_file write succeeded.
4678          *        -1 otherwise.
4679          */
4680         iter->leftover = ret;
4681     }
4682 
4683     return 0;
4684 }
4685 
4686 /*
4687  * Should be used after trace_array_get(), trace_types_lock
4688  * ensures that i_cdev was already initialized.
4689  */
4690 static inline int tracing_get_cpu(struct inode *inode)
4691 {
4692     if (inode->i_cdev) /* See trace_create_cpu_file() */
4693         return (long)inode->i_cdev - 1;
4694     return RING_BUFFER_ALL_CPUS;
4695 }
4696 
4697 static const struct seq_operations tracer_seq_ops = {
4698     .start      = s_start,
4699     .next       = s_next,
4700     .stop       = s_stop,
4701     .show       = s_show,
4702 };
4703 
4704 static struct trace_iterator *
4705 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4706 {
4707     struct trace_array *tr = inode->i_private;
4708     struct trace_iterator *iter;
4709     int cpu;
4710 
4711     if (tracing_disabled)
4712         return ERR_PTR(-ENODEV);
4713 
4714     iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4715     if (!iter)
4716         return ERR_PTR(-ENOMEM);
4717 
4718     iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4719                     GFP_KERNEL);
4720     if (!iter->buffer_iter)
4721         goto release;
4722 
4723     /*
4724      * trace_find_next_entry() may need to save off iter->ent.
4725      * It will place it into the iter->temp buffer. As most
4726      * events are less than 128, allocate a buffer of that size.
4727      * If one is greater, then trace_find_next_entry() will
4728      * allocate a new buffer to adjust for the bigger iter->ent.
4729      * It's not critical if it fails to get allocated here.
4730      */
4731     iter->temp = kmalloc(128, GFP_KERNEL);
4732     if (iter->temp)
4733         iter->temp_size = 128;
4734 
4735     /*
4736      * trace_event_printf() may need to modify given format
4737      * string to replace %p with %px so that it shows real address
4738      * instead of hash value. However, that is only for the event
4739      * tracing, other tracer may not need. Defer the allocation
4740      * until it is needed.
4741      */
4742     iter->fmt = NULL;
4743     iter->fmt_size = 0;
4744 
4745     /*
4746      * We make a copy of the current tracer to avoid concurrent
4747      * changes on it while we are reading.
4748      */
4749     mutex_lock(&trace_types_lock);
4750     iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4751     if (!iter->trace)
4752         goto fail;
4753 
4754     *iter->trace = *tr->current_trace;
4755 
4756     if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4757         goto fail;
4758 
4759     iter->tr = tr;
4760 
4761 #ifdef CONFIG_TRACER_MAX_TRACE
4762     /* Currently only the top directory has a snapshot */
4763     if (tr->current_trace->print_max || snapshot)
4764         iter->array_buffer = &tr->max_buffer;
4765     else
4766 #endif
4767         iter->array_buffer = &tr->array_buffer;
4768     iter->snapshot = snapshot;
4769     iter->pos = -1;
4770     iter->cpu_file = tracing_get_cpu(inode);
4771     mutex_init(&iter->mutex);
4772 
4773     /* Notify the tracer early; before we stop tracing. */
4774     if (iter->trace->open)
4775         iter->trace->open(iter);
4776 
4777     /* Annotate start of buffers if we had overruns */
4778     if (ring_buffer_overruns(iter->array_buffer->buffer))
4779         iter->iter_flags |= TRACE_FILE_ANNOTATE;
4780 
4781     /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4782     if (trace_clocks[tr->clock_id].in_ns)
4783         iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4784 
4785     /*
4786      * If pause-on-trace is enabled, then stop the trace while
4787      * dumping, unless this is the "snapshot" file
4788      */
4789     if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4790         tracing_stop_tr(tr);
4791 
4792     if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4793         for_each_tracing_cpu(cpu) {
4794             iter->buffer_iter[cpu] =
4795                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4796                              cpu, GFP_KERNEL);
4797         }
4798         ring_buffer_read_prepare_sync();
4799         for_each_tracing_cpu(cpu) {
4800             ring_buffer_read_start(iter->buffer_iter[cpu]);
4801             tracing_iter_reset(iter, cpu);
4802         }
4803     } else {
4804         cpu = iter->cpu_file;
4805         iter->buffer_iter[cpu] =
4806             ring_buffer_read_prepare(iter->array_buffer->buffer,
4807                          cpu, GFP_KERNEL);
4808         ring_buffer_read_prepare_sync();
4809         ring_buffer_read_start(iter->buffer_iter[cpu]);
4810         tracing_iter_reset(iter, cpu);
4811     }
4812 
4813     mutex_unlock(&trace_types_lock);
4814 
4815     return iter;
4816 
4817  fail:
4818     mutex_unlock(&trace_types_lock);
4819     kfree(iter->trace);
4820     kfree(iter->temp);
4821     kfree(iter->buffer_iter);
4822 release:
4823     seq_release_private(inode, file);
4824     return ERR_PTR(-ENOMEM);
4825 }
4826 
4827 int tracing_open_generic(struct inode *inode, struct file *filp)
4828 {
4829     int ret;
4830 
4831     ret = tracing_check_open_get_tr(NULL);
4832     if (ret)
4833         return ret;
4834 
4835     filp->private_data = inode->i_private;
4836     return 0;
4837 }
4838 
4839 bool tracing_is_disabled(void)
4840 {
4841     return (tracing_disabled) ? true: false;
4842 }
4843 
4844 /*
4845  * Open and update trace_array ref count.
4846  * Must have the current trace_array passed to it.
4847  */
4848 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4849 {
4850     struct trace_array *tr = inode->i_private;
4851     int ret;
4852 
4853     ret = tracing_check_open_get_tr(tr);
4854     if (ret)
4855         return ret;
4856 
4857     filp->private_data = inode->i_private;
4858 
4859     return 0;
4860 }
4861 
4862 static int tracing_mark_open(struct inode *inode, struct file *filp)
4863 {
4864     stream_open(inode, filp);
4865     return tracing_open_generic_tr(inode, filp);
4866 }
4867 
4868 static int tracing_release(struct inode *inode, struct file *file)
4869 {
4870     struct trace_array *tr = inode->i_private;
4871     struct seq_file *m = file->private_data;
4872     struct trace_iterator *iter;
4873     int cpu;
4874 
4875     if (!(file->f_mode & FMODE_READ)) {
4876         trace_array_put(tr);
4877         return 0;
4878     }
4879 
4880     /* Writes do not use seq_file */
4881     iter = m->private;
4882     mutex_lock(&trace_types_lock);
4883 
4884     for_each_tracing_cpu(cpu) {
4885         if (iter->buffer_iter[cpu])
4886             ring_buffer_read_finish(iter->buffer_iter[cpu]);
4887     }
4888 
4889     if (iter->trace && iter->trace->close)
4890         iter->trace->close(iter);
4891 
4892     if (!iter->snapshot && tr->stop_count)
4893         /* reenable tracing if it was previously enabled */
4894         tracing_start_tr(tr);
4895 
4896     __trace_array_put(tr);
4897 
4898     mutex_unlock(&trace_types_lock);
4899 
4900     mutex_destroy(&iter->mutex);
4901     free_cpumask_var(iter->started);
4902     kfree(iter->fmt);
4903     kfree(iter->temp);
4904     kfree(iter->trace);
4905     kfree(iter->buffer_iter);
4906     seq_release_private(inode, file);
4907 
4908     return 0;
4909 }
4910 
4911 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4912 {
4913     struct trace_array *tr = inode->i_private;
4914 
4915     trace_array_put(tr);
4916     return 0;
4917 }
4918 
4919 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4920 {
4921     struct trace_array *tr = inode->i_private;
4922 
4923     trace_array_put(tr);
4924 
4925     return single_release(inode, file);
4926 }
4927 
4928 static int tracing_open(struct inode *inode, struct file *file)
4929 {
4930     struct trace_array *tr = inode->i_private;
4931     struct trace_iterator *iter;
4932     int ret;
4933 
4934     ret = tracing_check_open_get_tr(tr);
4935     if (ret)
4936         return ret;
4937 
4938     /* If this file was open for write, then erase contents */
4939     if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4940         int cpu = tracing_get_cpu(inode);
4941         struct array_buffer *trace_buf = &tr->array_buffer;
4942 
4943 #ifdef CONFIG_TRACER_MAX_TRACE
4944         if (tr->current_trace->print_max)
4945             trace_buf = &tr->max_buffer;
4946 #endif
4947 
4948         if (cpu == RING_BUFFER_ALL_CPUS)
4949             tracing_reset_online_cpus(trace_buf);
4950         else
4951             tracing_reset_cpu(trace_buf, cpu);
4952     }
4953 
4954     if (file->f_mode & FMODE_READ) {
4955         iter = __tracing_open(inode, file, false);
4956         if (IS_ERR(iter))
4957             ret = PTR_ERR(iter);
4958         else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4959             iter->iter_flags |= TRACE_FILE_LAT_FMT;
4960     }
4961 
4962     if (ret < 0)
4963         trace_array_put(tr);
4964 
4965     return ret;
4966 }
4967 
4968 /*
4969  * Some tracers are not suitable for instance buffers.
4970  * A tracer is always available for the global array (toplevel)
4971  * or if it explicitly states that it is.
4972  */
4973 static bool
4974 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4975 {
4976     return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4977 }
4978 
4979 /* Find the next tracer that this trace array may use */
4980 static struct tracer *
4981 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4982 {
4983     while (t && !trace_ok_for_array(t, tr))
4984         t = t->next;
4985 
4986     return t;
4987 }
4988 
4989 static void *
4990 t_next(struct seq_file *m, void *v, loff_t *pos)
4991 {
4992     struct trace_array *tr = m->private;
4993     struct tracer *t = v;
4994 
4995     (*pos)++;
4996 
4997     if (t)
4998         t = get_tracer_for_array(tr, t->next);
4999 
5000     return t;
5001 }
5002 
5003 static void *t_start(struct seq_file *m, loff_t *pos)
5004 {
5005     struct trace_array *tr = m->private;
5006     struct tracer *t;
5007     loff_t l = 0;
5008 
5009     mutex_lock(&trace_types_lock);
5010 
5011     t = get_tracer_for_array(tr, trace_types);
5012     for (; t && l < *pos; t = t_next(m, t, &l))
5013             ;
5014 
5015     return t;
5016 }
5017 
5018 static void t_stop(struct seq_file *m, void *p)
5019 {
5020     mutex_unlock(&trace_types_lock);
5021 }
5022 
5023 static int t_show(struct seq_file *m, void *v)
5024 {
5025     struct tracer *t = v;
5026 
5027     if (!t)
5028         return 0;
5029 
5030     seq_puts(m, t->name);
5031     if (t->next)
5032         seq_putc(m, ' ');
5033     else
5034         seq_putc(m, '\n');
5035 
5036     return 0;
5037 }
5038 
5039 static const struct seq_operations show_traces_seq_ops = {
5040     .start      = t_start,
5041     .next       = t_next,
5042     .stop       = t_stop,
5043     .show       = t_show,
5044 };
5045 
5046 static int show_traces_open(struct inode *inode, struct file *file)
5047 {
5048     struct trace_array *tr = inode->i_private;
5049     struct seq_file *m;
5050     int ret;
5051 
5052     ret = tracing_check_open_get_tr(tr);
5053     if (ret)
5054         return ret;
5055 
5056     ret = seq_open(file, &show_traces_seq_ops);
5057     if (ret) {
5058         trace_array_put(tr);
5059         return ret;
5060     }
5061 
5062     m = file->private_data;
5063     m->private = tr;
5064 
5065     return 0;
5066 }
5067 
5068 static int show_traces_release(struct inode *inode, struct file *file)
5069 {
5070     struct trace_array *tr = inode->i_private;
5071 
5072     trace_array_put(tr);
5073     return seq_release(inode, file);
5074 }
5075 
5076 static ssize_t
5077 tracing_write_stub(struct file *filp, const char __user *ubuf,
5078            size_t count, loff_t *ppos)
5079 {
5080     return count;
5081 }
5082 
5083 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5084 {
5085     int ret;
5086 
5087     if (file->f_mode & FMODE_READ)
5088         ret = seq_lseek(file, offset, whence);
5089     else
5090         file->f_pos = ret = 0;
5091 
5092     return ret;
5093 }
5094 
5095 static const struct file_operations tracing_fops = {
5096     .open       = tracing_open,
5097     .read       = seq_read,
5098     .write      = tracing_write_stub,
5099     .llseek     = tracing_lseek,
5100     .release    = tracing_release,
5101 };
5102 
5103 static const struct file_operations show_traces_fops = {
5104     .open       = show_traces_open,
5105     .read       = seq_read,
5106     .llseek     = seq_lseek,
5107     .release    = show_traces_release,
5108 };
5109 
5110 static ssize_t
5111 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5112              size_t count, loff_t *ppos)
5113 {
5114     struct trace_array *tr = file_inode(filp)->i_private;
5115     char *mask_str;
5116     int len;
5117 
5118     len = snprintf(NULL, 0, "%*pb\n",
5119                cpumask_pr_args(tr->tracing_cpumask)) + 1;
5120     mask_str = kmalloc(len, GFP_KERNEL);
5121     if (!mask_str)
5122         return -ENOMEM;
5123 
5124     len = snprintf(mask_str, len, "%*pb\n",
5125                cpumask_pr_args(tr->tracing_cpumask));
5126     if (len >= count) {
5127         count = -EINVAL;
5128         goto out_err;
5129     }
5130     count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5131 
5132 out_err:
5133     kfree(mask_str);
5134 
5135     return count;
5136 }
5137 
5138 int tracing_set_cpumask(struct trace_array *tr,
5139             cpumask_var_t tracing_cpumask_new)
5140 {
5141     int cpu;
5142 
5143     if (!tr)
5144         return -EINVAL;
5145 
5146     local_irq_disable();
5147     arch_spin_lock(&tr->max_lock);
5148     for_each_tracing_cpu(cpu) {
5149         /*
5150          * Increase/decrease the disabled counter if we are
5151          * about to flip a bit in the cpumask:
5152          */
5153         if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5154                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5155             atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5156             ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5157         }
5158         if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5159                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5160             atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5161             ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5162         }
5163     }
5164     arch_spin_unlock(&tr->max_lock);
5165     local_irq_enable();
5166 
5167     cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5168 
5169     return 0;
5170 }
5171 
5172 static ssize_t
5173 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5174               size_t count, loff_t *ppos)
5175 {
5176     struct trace_array *tr = file_inode(filp)->i_private;
5177     cpumask_var_t tracing_cpumask_new;
5178     int err;
5179 
5180     if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5181         return -ENOMEM;
5182 
5183     err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5184     if (err)
5185         goto err_free;
5186 
5187     err = tracing_set_cpumask(tr, tracing_cpumask_new);
5188     if (err)
5189         goto err_free;
5190 
5191     free_cpumask_var(tracing_cpumask_new);
5192 
5193     return count;
5194 
5195 err_free:
5196     free_cpumask_var(tracing_cpumask_new);
5197 
5198     return err;
5199 }
5200 
5201 static const struct file_operations tracing_cpumask_fops = {
5202     .open       = tracing_open_generic_tr,
5203     .read       = tracing_cpumask_read,
5204     .write      = tracing_cpumask_write,
5205     .release    = tracing_release_generic_tr,
5206     .llseek     = generic_file_llseek,
5207 };
5208 
5209 static int tracing_trace_options_show(struct seq_file *m, void *v)
5210 {
5211     struct tracer_opt *trace_opts;
5212     struct trace_array *tr = m->private;
5213     u32 tracer_flags;
5214     int i;
5215 
5216     mutex_lock(&trace_types_lock);
5217     tracer_flags = tr->current_trace->flags->val;
5218     trace_opts = tr->current_trace->flags->opts;
5219 
5220     for (i = 0; trace_options[i]; i++) {
5221         if (tr->trace_flags & (1 << i))
5222             seq_printf(m, "%s\n", trace_options[i]);
5223         else
5224             seq_printf(m, "no%s\n", trace_options[i]);
5225     }
5226 
5227     for (i = 0; trace_opts[i].name; i++) {
5228         if (tracer_flags & trace_opts[i].bit)
5229             seq_printf(m, "%s\n", trace_opts[i].name);
5230         else
5231             seq_printf(m, "no%s\n", trace_opts[i].name);
5232     }
5233     mutex_unlock(&trace_types_lock);
5234 
5235     return 0;
5236 }
5237 
5238 static int __set_tracer_option(struct trace_array *tr,
5239                    struct tracer_flags *tracer_flags,
5240                    struct tracer_opt *opts, int neg)
5241 {
5242     struct tracer *trace = tracer_flags->trace;
5243     int ret;
5244 
5245     ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5246     if (ret)
5247         return ret;
5248 
5249     if (neg)
5250         tracer_flags->val &= ~opts->bit;
5251     else
5252         tracer_flags->val |= opts->bit;
5253     return 0;
5254 }
5255 
5256 /* Try to assign a tracer specific option */
5257 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5258 {
5259     struct tracer *trace = tr->current_trace;
5260     struct tracer_flags *tracer_flags = trace->flags;
5261     struct tracer_opt *opts = NULL;
5262     int i;
5263 
5264     for (i = 0; tracer_flags->opts[i].name; i++) {
5265         opts = &tracer_flags->opts[i];
5266 
5267         if (strcmp(cmp, opts->name) == 0)
5268             return __set_tracer_option(tr, trace->flags, opts, neg);
5269     }
5270 
5271     return -EINVAL;
5272 }
5273 
5274 /* Some tracers require overwrite to stay enabled */
5275 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5276 {
5277     if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5278         return -1;
5279 
5280     return 0;
5281 }
5282 
5283 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5284 {
5285     int *map;
5286 
5287     if ((mask == TRACE_ITER_RECORD_TGID) ||
5288         (mask == TRACE_ITER_RECORD_CMD))
5289         lockdep_assert_held(&event_mutex);
5290 
5291     /* do nothing if flag is already set */
5292     if (!!(tr->trace_flags & mask) == !!enabled)
5293         return 0;
5294 
5295     /* Give the tracer a chance to approve the change */
5296     if (tr->current_trace->flag_changed)
5297         if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5298             return -EINVAL;
5299 
5300     if (enabled)
5301         tr->trace_flags |= mask;
5302     else
5303         tr->trace_flags &= ~mask;
5304 
5305     if (mask == TRACE_ITER_RECORD_CMD)
5306         trace_event_enable_cmd_record(enabled);
5307 
5308     if (mask == TRACE_ITER_RECORD_TGID) {
5309         if (!tgid_map) {
5310             tgid_map_max = pid_max;
5311             map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5312                        GFP_KERNEL);
5313 
5314             /*
5315              * Pairs with smp_load_acquire() in
5316              * trace_find_tgid_ptr() to ensure that if it observes
5317              * the tgid_map we just allocated then it also observes
5318              * the corresponding tgid_map_max value.
5319              */
5320             smp_store_release(&tgid_map, map);
5321         }
5322         if (!tgid_map) {
5323             tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5324             return -ENOMEM;
5325         }
5326 
5327         trace_event_enable_tgid_record(enabled);
5328     }
5329 
5330     if (mask == TRACE_ITER_EVENT_FORK)
5331         trace_event_follow_fork(tr, enabled);
5332 
5333     if (mask == TRACE_ITER_FUNC_FORK)
5334         ftrace_pid_follow_fork(tr, enabled);
5335 
5336     if (mask == TRACE_ITER_OVERWRITE) {
5337         ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5338 #ifdef CONFIG_TRACER_MAX_TRACE
5339         ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5340 #endif
5341     }
5342 
5343     if (mask == TRACE_ITER_PRINTK) {
5344         trace_printk_start_stop_comm(enabled);
5345         trace_printk_control(enabled);
5346     }
5347 
5348     return 0;
5349 }
5350 
5351 int trace_set_options(struct trace_array *tr, char *option)
5352 {
5353     char *cmp;
5354     int neg = 0;
5355     int ret;
5356     size_t orig_len = strlen(option);
5357     int len;
5358 
5359     cmp = strstrip(option);
5360 
5361     len = str_has_prefix(cmp, "no");
5362     if (len)
5363         neg = 1;
5364 
5365     cmp += len;
5366 
5367     mutex_lock(&event_mutex);
5368     mutex_lock(&trace_types_lock);
5369 
5370     ret = match_string(trace_options, -1, cmp);
5371     /* If no option could be set, test the specific tracer options */
5372     if (ret < 0)
5373         ret = set_tracer_option(tr, cmp, neg);
5374     else
5375         ret = set_tracer_flag(tr, 1 << ret, !neg);
5376 
5377     mutex_unlock(&trace_types_lock);
5378     mutex_unlock(&event_mutex);
5379 
5380     /*
5381      * If the first trailing whitespace is replaced with '\0' by strstrip,
5382      * turn it back into a space.
5383      */
5384     if (orig_len > strlen(option))
5385         option[strlen(option)] = ' ';
5386 
5387     return ret;
5388 }
5389 
5390 static void __init apply_trace_boot_options(void)
5391 {
5392     char *buf = trace_boot_options_buf;
5393     char *option;
5394 
5395     while (true) {
5396         option = strsep(&buf, ",");
5397 
5398         if (!option)
5399             break;
5400 
5401         if (*option)
5402             trace_set_options(&global_trace, option);
5403 
5404         /* Put back the comma to allow this to be called again */
5405         if (buf)
5406             *(buf - 1) = ',';
5407     }
5408 }
5409 
5410 static ssize_t
5411 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5412             size_t cnt, loff_t *ppos)
5413 {
5414     struct seq_file *m = filp->private_data;
5415     struct trace_array *tr = m->private;
5416     char buf[64];
5417     int ret;
5418 
5419     if (cnt >= sizeof(buf))
5420         return -EINVAL;
5421 
5422     if (copy_from_user(buf, ubuf, cnt))
5423         return -EFAULT;
5424 
5425     buf[cnt] = 0;
5426 
5427     ret = trace_set_options(tr, buf);
5428     if (ret < 0)
5429         return ret;
5430 
5431     *ppos += cnt;
5432 
5433     return cnt;
5434 }
5435 
5436 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5437 {
5438     struct trace_array *tr = inode->i_private;
5439     int ret;
5440 
5441     ret = tracing_check_open_get_tr(tr);
5442     if (ret)
5443         return ret;
5444 
5445     ret = single_open(file, tracing_trace_options_show, inode->i_private);
5446     if (ret < 0)
5447         trace_array_put(tr);
5448 
5449     return ret;
5450 }
5451 
5452 static const struct file_operations tracing_iter_fops = {
5453     .open       = tracing_trace_options_open,
5454     .read       = seq_read,
5455     .llseek     = seq_lseek,
5456     .release    = tracing_single_release_tr,
5457     .write      = tracing_trace_options_write,
5458 };
5459 
5460 static const char readme_msg[] =
5461     "tracing mini-HOWTO:\n\n"
5462     "# echo 0 > tracing_on : quick way to disable tracing\n"
5463     "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5464     " Important files:\n"
5465     "  trace\t\t\t- The static contents of the buffer\n"
5466     "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5467     "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5468     "  current_tracer\t- function and latency tracers\n"
5469     "  available_tracers\t- list of configured tracers for current_tracer\n"
5470     "  error_log\t- error log for failed commands (that support it)\n"
5471     "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5472     "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5473     "  trace_clock\t\t- change the clock used to order events\n"
5474     "       local:   Per cpu clock but may not be synced across CPUs\n"
5475     "      global:   Synced across CPUs but slows tracing down.\n"
5476     "     counter:   Not a clock, but just an increment\n"
5477     "      uptime:   Jiffy counter from time of boot\n"
5478     "        perf:   Same clock that perf events use\n"
5479 #ifdef CONFIG_X86_64
5480     "     x86-tsc:   TSC cycle counter\n"
5481 #endif
5482     "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5483     "       delta:   Delta difference against a buffer-wide timestamp\n"
5484     "    absolute:   Absolute (standalone) timestamp\n"
5485     "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5486     "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5487     "  tracing_cpumask\t- Limit which CPUs to trace\n"
5488     "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5489     "\t\t\t  Remove sub-buffer with rmdir\n"
5490     "  trace_options\t\t- Set format or modify how tracing happens\n"
5491     "\t\t\t  Disable an option by prefixing 'no' to the\n"
5492     "\t\t\t  option name\n"
5493     "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5494 #ifdef CONFIG_DYNAMIC_FTRACE
5495     "\n  available_filter_functions - list of functions that can be filtered on\n"
5496     "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5497     "\t\t\t  functions\n"
5498     "\t     accepts: func_full_name or glob-matching-pattern\n"
5499     "\t     modules: Can select a group via module\n"
5500     "\t      Format: :mod:<module-name>\n"
5501     "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5502     "\t    triggers: a command to perform when function is hit\n"
5503     "\t      Format: <function>:<trigger>[:count]\n"
5504     "\t     trigger: traceon, traceoff\n"
5505     "\t\t      enable_event:<system>:<event>\n"
5506     "\t\t      disable_event:<system>:<event>\n"
5507 #ifdef CONFIG_STACKTRACE
5508     "\t\t      stacktrace\n"
5509 #endif
5510 #ifdef CONFIG_TRACER_SNAPSHOT
5511     "\t\t      snapshot\n"
5512 #endif
5513     "\t\t      dump\n"
5514     "\t\t      cpudump\n"
5515     "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5516     "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5517     "\t     The first one will disable tracing every time do_fault is hit\n"
5518     "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5519     "\t       The first time do trap is hit and it disables tracing, the\n"
5520     "\t       counter will decrement to 2. If tracing is already disabled,\n"
5521     "\t       the counter will not decrement. It only decrements when the\n"
5522     "\t       trigger did work\n"
5523     "\t     To remove trigger without count:\n"
5524     "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5525     "\t     To remove trigger with a count:\n"
5526     "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5527     "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5528     "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5529     "\t    modules: Can select a group via module command :mod:\n"
5530     "\t    Does not accept triggers\n"
5531 #endif /* CONFIG_DYNAMIC_FTRACE */
5532 #ifdef CONFIG_FUNCTION_TRACER
5533     "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5534     "\t\t    (function)\n"
5535     "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5536     "\t\t    (function)\n"
5537 #endif
5538 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5539     "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5540     "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5541     "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5542 #endif
5543 #ifdef CONFIG_TRACER_SNAPSHOT
5544     "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5545     "\t\t\t  snapshot buffer. Read the contents for more\n"
5546     "\t\t\t  information\n"
5547 #endif
5548 #ifdef CONFIG_STACK_TRACER
5549     "  stack_trace\t\t- Shows the max stack trace when active\n"
5550     "  stack_max_size\t- Shows current max stack size that was traced\n"
5551     "\t\t\t  Write into this file to reset the max size (trigger a\n"
5552     "\t\t\t  new trace)\n"
5553 #ifdef CONFIG_DYNAMIC_FTRACE
5554     "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5555     "\t\t\t  traces\n"
5556 #endif
5557 #endif /* CONFIG_STACK_TRACER */
5558 #ifdef CONFIG_DYNAMIC_EVENTS
5559     "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5560     "\t\t\t  Write into this file to define/undefine new trace events.\n"
5561 #endif
5562 #ifdef CONFIG_KPROBE_EVENTS
5563     "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5564     "\t\t\t  Write into this file to define/undefine new trace events.\n"
5565 #endif
5566 #ifdef CONFIG_UPROBE_EVENTS
5567     "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5568     "\t\t\t  Write into this file to define/undefine new trace events.\n"
5569 #endif
5570 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5571     "\t  accepts: event-definitions (one definition per line)\n"
5572     "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5573     "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5574 #ifdef CONFIG_HIST_TRIGGERS
5575     "\t           s:[synthetic/]<event> <field> [<field>]\n"
5576 #endif
5577     "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]\n"
5578     "\t           -:[<group>/][<event>]\n"
5579 #ifdef CONFIG_KPROBE_EVENTS
5580     "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5581   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5582 #endif
5583 #ifdef CONFIG_UPROBE_EVENTS
5584   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5585 #endif
5586     "\t     args: <name>=fetcharg[:type]\n"
5587     "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5588 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5589     "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5590 #else
5591     "\t           $stack<index>, $stack, $retval, $comm,\n"
5592 #endif
5593     "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5594     "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5595     "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5596     "\t           <type>\\[<array-size>\\]\n"
5597 #ifdef CONFIG_HIST_TRIGGERS
5598     "\t    field: <stype> <name>;\n"
5599     "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5600     "\t           [unsigned] char/int/long\n"
5601 #endif
5602     "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5603     "\t            of the <attached-group>/<attached-event>.\n"
5604 #endif
5605     "  events/\t\t- Directory containing all trace event subsystems:\n"
5606     "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5607     "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5608     "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5609     "\t\t\t  events\n"
5610     "      filter\t\t- If set, only events passing filter are traced\n"
5611     "  events/<system>/<event>/\t- Directory containing control files for\n"
5612     "\t\t\t  <event>:\n"
5613     "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5614     "      filter\t\t- If set, only events passing filter are traced\n"
5615     "      trigger\t\t- If set, a command to perform when event is hit\n"
5616     "\t    Format: <trigger>[:count][if <filter>]\n"
5617     "\t   trigger: traceon, traceoff\n"
5618     "\t            enable_event:<system>:<event>\n"
5619     "\t            disable_event:<system>:<event>\n"
5620 #ifdef CONFIG_HIST_TRIGGERS
5621     "\t            enable_hist:<system>:<event>\n"
5622     "\t            disable_hist:<system>:<event>\n"
5623 #endif
5624 #ifdef CONFIG_STACKTRACE
5625     "\t\t    stacktrace\n"
5626 #endif
5627 #ifdef CONFIG_TRACER_SNAPSHOT
5628     "\t\t    snapshot\n"
5629 #endif
5630 #ifdef CONFIG_HIST_TRIGGERS
5631     "\t\t    hist (see below)\n"
5632 #endif
5633     "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5634     "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5635     "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5636     "\t                  events/block/block_unplug/trigger\n"
5637     "\t   The first disables tracing every time block_unplug is hit.\n"
5638     "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5639     "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5640     "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5641     "\t   Like function triggers, the counter is only decremented if it\n"
5642     "\t    enabled or disabled tracing.\n"
5643     "\t   To remove a trigger without a count:\n"
5644     "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5645     "\t   To remove a trigger with a count:\n"
5646     "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5647     "\t   Filters can be ignored when removing a trigger.\n"
5648 #ifdef CONFIG_HIST_TRIGGERS
5649     "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5650     "\t    Format: hist:keys=<field1[,field2,...]>\n"
5651     "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5652     "\t            [:values=<field1[,field2,...]>]\n"
5653     "\t            [:sort=<field1[,field2,...]>]\n"
5654     "\t            [:size=#entries]\n"
5655     "\t            [:pause][:continue][:clear]\n"
5656     "\t            [:name=histname1]\n"
5657     "\t            [:<handler>.<action>]\n"
5658     "\t            [if <filter>]\n\n"
5659     "\t    Note, special fields can be used as well:\n"
5660     "\t            common_timestamp - to record current timestamp\n"
5661     "\t            common_cpu - to record the CPU the event happened on\n"
5662     "\n"
5663     "\t    A hist trigger variable can be:\n"
5664     "\t        - a reference to a field e.g. x=current_timestamp,\n"
5665     "\t        - a reference to another variable e.g. y=$x,\n"
5666     "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5667     "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5668     "\n"
5669     "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5670     "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5671     "\t    variable reference, field or numeric literal.\n"
5672     "\n"
5673     "\t    When a matching event is hit, an entry is added to a hash\n"
5674     "\t    table using the key(s) and value(s) named, and the value of a\n"
5675     "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5676     "\t    correspond to fields in the event's format description.  Keys\n"
5677     "\t    can be any field, or the special string 'stacktrace'.\n"
5678     "\t    Compound keys consisting of up to two fields can be specified\n"
5679     "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5680     "\t    fields.  Sort keys consisting of up to two fields can be\n"
5681     "\t    specified using the 'sort' keyword.  The sort direction can\n"
5682     "\t    be modified by appending '.descending' or '.ascending' to a\n"
5683     "\t    sort field.  The 'size' parameter can be used to specify more\n"
5684     "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5685     "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5686     "\t    its histogram data will be shared with other triggers of the\n"
5687     "\t    same name, and trigger hits will update this common data.\n\n"
5688     "\t    Reading the 'hist' file for the event will dump the hash\n"
5689     "\t    table in its entirety to stdout.  If there are multiple hist\n"
5690     "\t    triggers attached to an event, there will be a table for each\n"
5691     "\t    trigger in the output.  The table displayed for a named\n"
5692     "\t    trigger will be the same as any other instance having the\n"
5693     "\t    same name.  The default format used to display a given field\n"
5694     "\t    can be modified by appending any of the following modifiers\n"
5695     "\t    to the field name, as applicable:\n\n"
5696     "\t            .hex        display a number as a hex value\n"
5697     "\t            .sym        display an address as a symbol\n"
5698     "\t            .sym-offset display an address as a symbol and offset\n"
5699     "\t            .execname   display a common_pid as a program name\n"
5700     "\t            .syscall    display a syscall id as a syscall name\n"
5701     "\t            .log2       display log2 value rather than raw number\n"
5702     "\t            .buckets=size  display values in groups of size rather than raw number\n"
5703     "\t            .usecs      display a common_timestamp in microseconds\n\n"
5704     "\t    The 'pause' parameter can be used to pause an existing hist\n"
5705     "\t    trigger or to start a hist trigger but not log any events\n"
5706     "\t    until told to do so.  'continue' can be used to start or\n"
5707     "\t    restart a paused hist trigger.\n\n"
5708     "\t    The 'clear' parameter will clear the contents of a running\n"
5709     "\t    hist trigger and leave its current paused/active state\n"
5710     "\t    unchanged.\n\n"
5711     "\t    The enable_hist and disable_hist triggers can be used to\n"
5712     "\t    have one event conditionally start and stop another event's\n"
5713     "\t    already-attached hist trigger.  The syntax is analogous to\n"
5714     "\t    the enable_event and disable_event triggers.\n\n"
5715     "\t    Hist trigger handlers and actions are executed whenever a\n"
5716     "\t    a histogram entry is added or updated.  They take the form:\n\n"
5717     "\t        <handler>.<action>\n\n"
5718     "\t    The available handlers are:\n\n"
5719     "\t        onmatch(matching.event)  - invoke on addition or update\n"
5720     "\t        onmax(var)               - invoke if var exceeds current max\n"
5721     "\t        onchange(var)            - invoke action if var changes\n\n"
5722     "\t    The available actions are:\n\n"
5723     "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5724     "\t        save(field,...)                      - save current event fields\n"
5725 #ifdef CONFIG_TRACER_SNAPSHOT
5726     "\t        snapshot()                           - snapshot the trace buffer\n\n"
5727 #endif
5728 #ifdef CONFIG_SYNTH_EVENTS
5729     "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5730     "\t  Write into this file to define/undefine new synthetic events.\n"
5731     "\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5732 #endif
5733 #endif
5734 ;
5735 
5736 static ssize_t
5737 tracing_readme_read(struct file *filp, char __user *ubuf,
5738                size_t cnt, loff_t *ppos)
5739 {
5740     return simple_read_from_buffer(ubuf, cnt, ppos,
5741                     readme_msg, strlen(readme_msg));
5742 }
5743 
5744 static const struct file_operations tracing_readme_fops = {
5745     .open       = tracing_open_generic,
5746     .read       = tracing_readme_read,
5747     .llseek     = generic_file_llseek,
5748 };
5749 
5750 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5751 {
5752     int pid = ++(*pos);
5753 
5754     return trace_find_tgid_ptr(pid);
5755 }
5756 
5757 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5758 {
5759     int pid = *pos;
5760 
5761     return trace_find_tgid_ptr(pid);
5762 }
5763 
5764 static void saved_tgids_stop(struct seq_file *m, void *v)
5765 {
5766 }
5767 
5768 static int saved_tgids_show(struct seq_file *m, void *v)
5769 {
5770     int *entry = (int *)v;
5771     int pid = entry - tgid_map;
5772     int tgid = *entry;
5773 
5774     if (tgid == 0)
5775         return SEQ_SKIP;
5776 
5777     seq_printf(m, "%d %d\n", pid, tgid);
5778     return 0;
5779 }
5780 
5781 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5782     .start      = saved_tgids_start,
5783     .stop       = saved_tgids_stop,
5784     .next       = saved_tgids_next,
5785     .show       = saved_tgids_show,
5786 };
5787 
5788 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5789 {
5790     int ret;
5791 
5792     ret = tracing_check_open_get_tr(NULL);
5793     if (ret)
5794         return ret;
5795 
5796     return seq_open(filp, &tracing_saved_tgids_seq_ops);
5797 }
5798 
5799 
5800 static const struct file_operations tracing_saved_tgids_fops = {
5801     .open       = tracing_saved_tgids_open,
5802     .read       = seq_read,
5803     .llseek     = seq_lseek,
5804     .release    = seq_release,
5805 };
5806 
5807 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5808 {
5809     unsigned int *ptr = v;
5810 
5811     if (*pos || m->count)
5812         ptr++;
5813 
5814     (*pos)++;
5815 
5816     for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5817          ptr++) {
5818         if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5819             continue;
5820 
5821         return ptr;
5822     }
5823 
5824     return NULL;
5825 }
5826 
5827 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5828 {
5829     void *v;
5830     loff_t l = 0;
5831 
5832     preempt_disable();
5833     arch_spin_lock(&trace_cmdline_lock);
5834 
5835     v = &savedcmd->map_cmdline_to_pid[0];
5836     while (l <= *pos) {
5837         v = saved_cmdlines_next(m, v, &l);
5838         if (!v)
5839             return NULL;
5840     }
5841 
5842     return v;
5843 }
5844 
5845 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5846 {
5847     arch_spin_unlock(&trace_cmdline_lock);
5848     preempt_enable();
5849 }
5850 
5851 static int saved_cmdlines_show(struct seq_file *m, void *v)
5852 {
5853     char buf[TASK_COMM_LEN];
5854     unsigned int *pid = v;
5855 
5856     __trace_find_cmdline(*pid, buf);
5857     seq_printf(m, "%d %s\n", *pid, buf);
5858     return 0;
5859 }
5860 
5861 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5862     .start      = saved_cmdlines_start,
5863     .next       = saved_cmdlines_next,
5864     .stop       = saved_cmdlines_stop,
5865     .show       = saved_cmdlines_show,
5866 };
5867 
5868 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5869 {
5870     int ret;
5871 
5872     ret = tracing_check_open_get_tr(NULL);
5873     if (ret)
5874         return ret;
5875 
5876     return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5877 }
5878 
5879 static const struct file_operations tracing_saved_cmdlines_fops = {
5880     .open       = tracing_saved_cmdlines_open,
5881     .read       = seq_read,
5882     .llseek     = seq_lseek,
5883     .release    = seq_release,
5884 };
5885 
5886 static ssize_t
5887 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5888                  size_t cnt, loff_t *ppos)
5889 {
5890     char buf[64];
5891     int r;
5892 
5893     arch_spin_lock(&trace_cmdline_lock);
5894     r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5895     arch_spin_unlock(&trace_cmdline_lock);
5896 
5897     return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5898 }
5899 
5900 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5901 {
5902     kfree(s->saved_cmdlines);
5903     kfree(s->map_cmdline_to_pid);
5904     kfree(s);
5905 }
5906 
5907 static int tracing_resize_saved_cmdlines(unsigned int val)
5908 {
5909     struct saved_cmdlines_buffer *s, *savedcmd_temp;
5910 
5911     s = kmalloc(sizeof(*s), GFP_KERNEL);
5912     if (!s)
5913         return -ENOMEM;
5914 
5915     if (allocate_cmdlines_buffer(val, s) < 0) {
5916         kfree(s);
5917         return -ENOMEM;
5918     }
5919 
5920     arch_spin_lock(&trace_cmdline_lock);
5921     savedcmd_temp = savedcmd;
5922     savedcmd = s;
5923     arch_spin_unlock(&trace_cmdline_lock);
5924     free_saved_cmdlines_buffer(savedcmd_temp);
5925 
5926     return 0;
5927 }
5928 
5929 static ssize_t
5930 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5931                   size_t cnt, loff_t *ppos)
5932 {
5933     unsigned long val;
5934     int ret;
5935 
5936     ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5937     if (ret)
5938         return ret;
5939 
5940     /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5941     if (!val || val > PID_MAX_DEFAULT)
5942         return -EINVAL;
5943 
5944     ret = tracing_resize_saved_cmdlines((unsigned int)val);
5945     if (ret < 0)
5946         return ret;
5947 
5948     *ppos += cnt;
5949 
5950     return cnt;
5951 }
5952 
5953 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5954     .open       = tracing_open_generic,
5955     .read       = tracing_saved_cmdlines_size_read,
5956     .write      = tracing_saved_cmdlines_size_write,
5957 };
5958 
5959 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5960 static union trace_eval_map_item *
5961 update_eval_map(union trace_eval_map_item *ptr)
5962 {
5963     if (!ptr->map.eval_string) {
5964         if (ptr->tail.next) {
5965             ptr = ptr->tail.next;
5966             /* Set ptr to the next real item (skip head) */
5967             ptr++;
5968         } else
5969             return NULL;
5970     }
5971     return ptr;
5972 }
5973 
5974 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5975 {
5976     union trace_eval_map_item *ptr = v;
5977 
5978     /*
5979      * Paranoid! If ptr points to end, we don't want to increment past it.
5980      * This really should never happen.
5981      */
5982     (*pos)++;
5983     ptr = update_eval_map(ptr);
5984     if (WARN_ON_ONCE(!ptr))
5985         return NULL;
5986 
5987     ptr++;
5988     ptr = update_eval_map(ptr);
5989 
5990     return ptr;
5991 }
5992 
5993 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5994 {
5995     union trace_eval_map_item *v;
5996     loff_t l = 0;
5997 
5998     mutex_lock(&trace_eval_mutex);
5999 
6000     v = trace_eval_maps;
6001     if (v)
6002         v++;
6003 
6004     while (v && l < *pos) {
6005         v = eval_map_next(m, v, &l);
6006     }
6007 
6008     return v;
6009 }
6010 
6011 static void eval_map_stop(struct seq_file *m, void *v)
6012 {
6013     mutex_unlock(&trace_eval_mutex);
6014 }
6015 
6016 static int eval_map_show(struct seq_file *m, void *v)
6017 {
6018     union trace_eval_map_item *ptr = v;
6019 
6020     seq_printf(m, "%s %ld (%s)\n",
6021            ptr->map.eval_string, ptr->map.eval_value,
6022            ptr->map.system);
6023 
6024     return 0;
6025 }
6026 
6027 static const struct seq_operations tracing_eval_map_seq_ops = {
6028     .start      = eval_map_start,
6029     .next       = eval_map_next,
6030     .stop       = eval_map_stop,
6031     .show       = eval_map_show,
6032 };
6033 
6034 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6035 {
6036     int ret;
6037 
6038     ret = tracing_check_open_get_tr(NULL);
6039     if (ret)
6040         return ret;
6041 
6042     return seq_open(filp, &tracing_eval_map_seq_ops);
6043 }
6044 
6045 static const struct file_operations tracing_eval_map_fops = {
6046     .open       = tracing_eval_map_open,
6047     .read       = seq_read,
6048     .llseek     = seq_lseek,
6049     .release    = seq_release,
6050 };
6051 
6052 static inline union trace_eval_map_item *
6053 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6054 {
6055     /* Return tail of array given the head */
6056     return ptr + ptr->head.length + 1;
6057 }
6058 
6059 static void
6060 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6061                int len)
6062 {
6063     struct trace_eval_map **stop;
6064     struct trace_eval_map **map;
6065     union trace_eval_map_item *map_array;
6066     union trace_eval_map_item *ptr;
6067 
6068     stop = start + len;
6069 
6070     /*
6071      * The trace_eval_maps contains the map plus a head and tail item,
6072      * where the head holds the module and length of array, and the
6073      * tail holds a pointer to the next list.
6074      */
6075     map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6076     if (!map_array) {
6077         pr_warn("Unable to allocate trace eval mapping\n");
6078         return;
6079     }
6080 
6081     mutex_lock(&trace_eval_mutex);
6082 
6083     if (!trace_eval_maps)
6084         trace_eval_maps = map_array;
6085     else {
6086         ptr = trace_eval_maps;
6087         for (;;) {
6088             ptr = trace_eval_jmp_to_tail(ptr);
6089             if (!ptr->tail.next)
6090                 break;
6091             ptr = ptr->tail.next;
6092 
6093         }
6094         ptr->tail.next = map_array;
6095     }
6096     map_array->head.mod = mod;
6097     map_array->head.length = len;
6098     map_array++;
6099 
6100     for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6101         map_array->map = **map;
6102         map_array++;
6103     }
6104     memset(map_array, 0, sizeof(*map_array));
6105 
6106     mutex_unlock(&trace_eval_mutex);
6107 }
6108 
6109 static void trace_create_eval_file(struct dentry *d_tracer)
6110 {
6111     trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6112               NULL, &tracing_eval_map_fops);
6113 }
6114 
6115 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6116 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6117 static inline void trace_insert_eval_map_file(struct module *mod,
6118                   struct trace_eval_map **start, int len) { }
6119 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6120 
6121 static void trace_insert_eval_map(struct module *mod,
6122                   struct trace_eval_map **start, int len)
6123 {
6124     struct trace_eval_map **map;
6125 
6126     if (len <= 0)
6127         return;
6128 
6129     map = start;
6130 
6131     trace_event_eval_update(map, len);
6132 
6133     trace_insert_eval_map_file(mod, start, len);
6134 }
6135 
6136 static ssize_t
6137 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6138                size_t cnt, loff_t *ppos)
6139 {
6140     struct trace_array *tr = filp->private_data;
6141     char buf[MAX_TRACER_SIZE+2];
6142     int r;
6143 
6144     mutex_lock(&trace_types_lock);
6145     r = sprintf(buf, "%s\n", tr->current_trace->name);
6146     mutex_unlock(&trace_types_lock);
6147 
6148     return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6149 }
6150 
6151 int tracer_init(struct tracer *t, struct trace_array *tr)
6152 {
6153     tracing_reset_online_cpus(&tr->array_buffer);
6154     return t->init(tr);
6155 }
6156 
6157 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6158 {
6159     int cpu;
6160 
6161     for_each_tracing_cpu(cpu)
6162         per_cpu_ptr(buf->data, cpu)->entries = val;
6163 }
6164 
6165 #ifdef CONFIG_TRACER_MAX_TRACE
6166 /* resize @tr's buffer to the size of @size_tr's entries */
6167 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6168                     struct array_buffer *size_buf, int cpu_id)
6169 {
6170     int cpu, ret = 0;
6171 
6172     if (cpu_id == RING_BUFFER_ALL_CPUS) {
6173         for_each_tracing_cpu(cpu) {
6174             ret = ring_buffer_resize(trace_buf->buffer,
6175                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6176             if (ret < 0)
6177                 break;
6178             per_cpu_ptr(trace_buf->data, cpu)->entries =
6179                 per_cpu_ptr(size_buf->data, cpu)->entries;
6180         }
6181     } else {
6182         ret = ring_buffer_resize(trace_buf->buffer,
6183                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6184         if (ret == 0)
6185             per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6186                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6187     }
6188 
6189     return ret;
6190 }
6191 #endif /* CONFIG_TRACER_MAX_TRACE */
6192 
6193 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6194                     unsigned long size, int cpu)
6195 {
6196     int ret;
6197 
6198     /*
6199      * If kernel or user changes the size of the ring buffer
6200      * we use the size that was given, and we can forget about
6201      * expanding it later.
6202      */
6203     ring_buffer_expanded = true;
6204 
6205     /* May be called before buffers are initialized */
6206     if (!tr->array_buffer.buffer)
6207         return 0;
6208 
6209     ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6210     if (ret < 0)
6211         return ret;
6212 
6213 #ifdef CONFIG_TRACER_MAX_TRACE
6214     if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6215         !tr->current_trace->use_max_tr)
6216         goto out;
6217 
6218     ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6219     if (ret < 0) {
6220         int r = resize_buffer_duplicate_size(&tr->array_buffer,
6221                              &tr->array_buffer, cpu);
6222         if (r < 0) {
6223             /*
6224              * AARGH! We are left with different
6225              * size max buffer!!!!
6226              * The max buffer is our "snapshot" buffer.
6227              * When a tracer needs a snapshot (one of the
6228              * latency tracers), it swaps the max buffer
6229              * with the saved snap shot. We succeeded to
6230              * update the size of the main buffer, but failed to
6231              * update the size of the max buffer. But when we tried
6232              * to reset the main buffer to the original size, we
6233              * failed there too. This is very unlikely to
6234              * happen, but if it does, warn and kill all
6235              * tracing.
6236              */
6237             WARN_ON(1);
6238             tracing_disabled = 1;
6239         }
6240         return ret;
6241     }
6242 
6243     if (cpu == RING_BUFFER_ALL_CPUS)
6244         set_buffer_entries(&tr->max_buffer, size);
6245     else
6246         per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6247 
6248  out:
6249 #endif /* CONFIG_TRACER_MAX_TRACE */
6250 
6251     if (cpu == RING_BUFFER_ALL_CPUS)
6252         set_buffer_entries(&tr->array_buffer, size);
6253     else
6254         per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6255 
6256     return ret;
6257 }
6258 
6259 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6260                   unsigned long size, int cpu_id)
6261 {
6262     int ret;
6263 
6264     mutex_lock(&trace_types_lock);
6265 
6266     if (cpu_id != RING_BUFFER_ALL_CPUS) {
6267         /* make sure, this cpu is enabled in the mask */
6268         if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6269             ret = -EINVAL;
6270             goto out;
6271         }
6272     }
6273 
6274     ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6275     if (ret < 0)
6276         ret = -ENOMEM;
6277 
6278 out:
6279     mutex_unlock(&trace_types_lock);
6280 
6281     return ret;
6282 }
6283 
6284 
6285 /**
6286  * tracing_update_buffers - used by tracing facility to expand ring buffers
6287  *
6288  * To save on memory when the tracing is never used on a system with it
6289  * configured in. The ring buffers are set to a minimum size. But once
6290  * a user starts to use the tracing facility, then they need to grow
6291  * to their default size.
6292  *
6293  * This function is to be called when a tracer is about to be used.
6294  */
6295 int tracing_update_buffers(void)
6296 {
6297     int ret = 0;
6298 
6299     mutex_lock(&trace_types_lock);
6300     if (!ring_buffer_expanded)
6301         ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6302                         RING_BUFFER_ALL_CPUS);
6303     mutex_unlock(&trace_types_lock);
6304 
6305     return ret;
6306 }
6307 
6308 struct trace_option_dentry;
6309 
6310 static void
6311 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6312 
6313 /*
6314  * Used to clear out the tracer before deletion of an instance.
6315  * Must have trace_types_lock held.
6316  */
6317 static void tracing_set_nop(struct trace_array *tr)
6318 {
6319     if (tr->current_trace == &nop_trace)
6320         return;
6321     
6322     tr->current_trace->enabled--;
6323 
6324     if (tr->current_trace->reset)
6325         tr->current_trace->reset(tr);
6326 
6327     tr->current_trace = &nop_trace;
6328 }
6329 
6330 static bool tracer_options_updated;
6331 
6332 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6333 {
6334     /* Only enable if the directory has been created already. */
6335     if (!tr->dir)
6336         return;
6337 
6338     /* Only create trace option files after update_tracer_options finish */
6339     if (!tracer_options_updated)
6340         return;
6341 
6342     create_trace_option_files(tr, t);
6343 }
6344 
6345 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6346 {
6347     struct tracer *t;
6348 #ifdef CONFIG_TRACER_MAX_TRACE
6349     bool had_max_tr;
6350 #endif
6351     int ret = 0;
6352 
6353     mutex_lock(&trace_types_lock);
6354 
6355     if (!ring_buffer_expanded) {
6356         ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6357                         RING_BUFFER_ALL_CPUS);
6358         if (ret < 0)
6359             goto out;
6360         ret = 0;
6361     }
6362 
6363     for (t = trace_types; t; t = t->next) {
6364         if (strcmp(t->name, buf) == 0)
6365             break;
6366     }
6367     if (!t) {
6368         ret = -EINVAL;
6369         goto out;
6370     }
6371     if (t == tr->current_trace)
6372         goto out;
6373 
6374 #ifdef CONFIG_TRACER_SNAPSHOT
6375     if (t->use_max_tr) {
6376         arch_spin_lock(&tr->max_lock);
6377         if (tr->cond_snapshot)
6378             ret = -EBUSY;
6379         arch_spin_unlock(&tr->max_lock);
6380         if (ret)
6381             goto out;
6382     }
6383 #endif
6384     /* Some tracers won't work on kernel command line */
6385     if (system_state < SYSTEM_RUNNING && t->noboot) {
6386         pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6387             t->name);
6388         goto out;
6389     }
6390 
6391     /* Some tracers are only allowed for the top level buffer */
6392     if (!trace_ok_for_array(t, tr)) {
6393         ret = -EINVAL;
6394         goto out;
6395     }
6396 
6397     /* If trace pipe files are being read, we can't change the tracer */
6398     if (tr->trace_ref) {
6399         ret = -EBUSY;
6400         goto out;
6401     }
6402 
6403     trace_branch_disable();
6404 
6405     tr->current_trace->enabled--;
6406 
6407     if (tr->current_trace->reset)
6408         tr->current_trace->reset(tr);
6409 
6410     /* Current trace needs to be nop_trace before synchronize_rcu */
6411     tr->current_trace = &nop_trace;
6412 
6413 #ifdef CONFIG_TRACER_MAX_TRACE
6414     had_max_tr = tr->allocated_snapshot;
6415 
6416     if (had_max_tr && !t->use_max_tr) {
6417         /*
6418          * We need to make sure that the update_max_tr sees that
6419          * current_trace changed to nop_trace to keep it from
6420          * swapping the buffers after we resize it.
6421          * The update_max_tr is called from interrupts disabled
6422          * so a synchronized_sched() is sufficient.
6423          */
6424         synchronize_rcu();
6425         free_snapshot(tr);
6426     }
6427 
6428     if (t->use_max_tr && !had_max_tr) {
6429         ret = tracing_alloc_snapshot_instance(tr);
6430         if (ret < 0)
6431             goto out;
6432     }
6433 #endif
6434 
6435     if (t->init) {
6436         ret = tracer_init(t, tr);
6437         if (ret)
6438             goto out;
6439     }
6440 
6441     tr->current_trace = t;
6442     tr->current_trace->enabled++;
6443     trace_branch_enable(tr);
6444  out:
6445     mutex_unlock(&trace_types_lock);
6446 
6447     return ret;
6448 }
6449 
6450 static ssize_t
6451 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6452             size_t cnt, loff_t *ppos)
6453 {
6454     struct trace_array *tr = filp->private_data;
6455     char buf[MAX_TRACER_SIZE+1];
6456     char *name;
6457     size_t ret;
6458     int err;
6459 
6460     ret = cnt;
6461 
6462     if (cnt > MAX_TRACER_SIZE)
6463         cnt = MAX_TRACER_SIZE;
6464 
6465     if (copy_from_user(buf, ubuf, cnt))
6466         return -EFAULT;
6467 
6468     buf[cnt] = 0;
6469 
6470     name = strim(buf);
6471 
6472     err = tracing_set_tracer(tr, name);
6473     if (err)
6474         return err;
6475 
6476     *ppos += ret;
6477 
6478     return ret;
6479 }
6480 
6481 static ssize_t
6482 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6483            size_t cnt, loff_t *ppos)
6484 {
6485     char buf[64];
6486     int r;
6487 
6488     r = snprintf(buf, sizeof(buf), "%ld\n",
6489              *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6490     if (r > sizeof(buf))
6491         r = sizeof(buf);
6492     return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6493 }
6494 
6495 static ssize_t
6496 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6497             size_t cnt, loff_t *ppos)
6498 {
6499     unsigned long val;
6500     int ret;
6501 
6502     ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6503     if (ret)
6504         return ret;
6505 
6506     *ptr = val * 1000;
6507 
6508     return cnt;
6509 }
6510 
6511 static ssize_t
6512 tracing_thresh_read(struct file *filp, char __user *ubuf,
6513             size_t cnt, loff_t *ppos)
6514 {
6515     return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6516 }
6517 
6518 static ssize_t
6519 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6520              size_t cnt, loff_t *ppos)
6521 {
6522     struct trace_array *tr = filp->private_data;
6523     int ret;
6524 
6525     mutex_lock(&trace_types_lock);
6526     ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6527     if (ret < 0)
6528         goto out;
6529 
6530     if (tr->current_trace->update_thresh) {
6531         ret = tr->current_trace->update_thresh(tr);
6532         if (ret < 0)
6533             goto out;
6534     }
6535 
6536     ret = cnt;
6537 out:
6538     mutex_unlock(&trace_types_lock);
6539 
6540     return ret;
6541 }
6542 
6543 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6544 
6545 static ssize_t
6546 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6547              size_t cnt, loff_t *ppos)
6548 {
6549     return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6550 }
6551 
6552 static ssize_t
6553 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6554               size_t cnt, loff_t *ppos)
6555 {
6556     return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6557 }
6558 
6559 #endif
6560 
6561 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6562 {
6563     struct trace_array *tr = inode->i_private;
6564     struct trace_iterator *iter;
6565     int ret;
6566 
6567     ret = tracing_check_open_get_tr(tr);
6568     if (ret)
6569         return ret;
6570 
6571     mutex_lock(&trace_types_lock);
6572 
6573     /* create a buffer to store the information to pass to userspace */
6574     iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6575     if (!iter) {
6576         ret = -ENOMEM;
6577         __trace_array_put(tr);
6578         goto out;
6579     }
6580 
6581     trace_seq_init(&iter->seq);
6582     iter->trace = tr->current_trace;
6583 
6584     if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6585         ret = -ENOMEM;
6586         goto fail;
6587     }
6588 
6589     /* trace pipe does not show start of buffer */
6590     cpumask_setall(iter->started);
6591 
6592     if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6593         iter->iter_flags |= TRACE_FILE_LAT_FMT;
6594 
6595     /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6596     if (trace_clocks[tr->clock_id].in_ns)
6597         iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6598 
6599     iter->tr = tr;
6600     iter->array_buffer = &tr->array_buffer;
6601     iter->cpu_file = tracing_get_cpu(inode);
6602     mutex_init(&iter->mutex);
6603     filp->private_data = iter;
6604 
6605     if (iter->trace->pipe_open)
6606         iter->trace->pipe_open(iter);
6607 
6608     nonseekable_open(inode, filp);
6609 
6610     tr->trace_ref++;
6611 out:
6612     mutex_unlock(&trace_types_lock);
6613     return ret;
6614 
6615 fail:
6616     kfree(iter);
6617     __trace_array_put(tr);
6618     mutex_unlock(&trace_types_lock);
6619     return ret;
6620 }
6621 
6622 static int tracing_release_pipe(struct inode *inode, struct file *file)
6623 {
6624     struct trace_iterator *iter = file->private_data;
6625     struct trace_array *tr = inode->i_private;
6626 
6627     mutex_lock(&trace_types_lock);
6628 
6629     tr->trace_ref--;
6630 
6631     if (iter->trace->pipe_close)
6632         iter->trace->pipe_close(iter);
6633 
6634     mutex_unlock(&trace_types_lock);
6635 
6636     free_cpumask_var(iter->started);
6637     mutex_destroy(&iter->mutex);
6638     kfree(iter);
6639 
6640     trace_array_put(tr);
6641 
6642     return 0;
6643 }
6644 
6645 static __poll_t
6646 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6647 {
6648     struct trace_array *tr = iter->tr;
6649 
6650     /* Iterators are static, they should be filled or empty */
6651     if (trace_buffer_iter(iter, iter->cpu_file))
6652         return EPOLLIN | EPOLLRDNORM;
6653 
6654     if (tr->trace_flags & TRACE_ITER_BLOCK)
6655         /*
6656          * Always select as readable when in blocking mode
6657          */
6658         return EPOLLIN | EPOLLRDNORM;
6659     else
6660         return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6661                          filp, poll_table);
6662 }
6663 
6664 static __poll_t
6665 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6666 {
6667     struct trace_iterator *iter = filp->private_data;
6668 
6669     return trace_poll(iter, filp, poll_table);
6670 }
6671 
6672 /* Must be called with iter->mutex held. */
6673 static int tracing_wait_pipe(struct file *filp)
6674 {
6675     struct trace_iterator *iter = filp->private_data;
6676     int ret;
6677 
6678     while (trace_empty(iter)) {
6679 
6680         if ((filp->f_flags & O_NONBLOCK)) {
6681             return -EAGAIN;
6682         }
6683 
6684         /*
6685          * We block until we read something and tracing is disabled.
6686          * We still block if tracing is disabled, but we have never
6687          * read anything. This allows a user to cat this file, and
6688          * then enable tracing. But after we have read something,
6689          * we give an EOF when tracing is again disabled.
6690          *
6691          * iter->pos will be 0 if we haven't read anything.
6692          */
6693         if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6694             break;
6695 
6696         mutex_unlock(&iter->mutex);
6697 
6698         ret = wait_on_pipe(iter, 0);
6699 
6700         mutex_lock(&iter->mutex);
6701 
6702         if (ret)
6703             return ret;
6704     }
6705 
6706     return 1;
6707 }
6708 
6709 /*
6710  * Consumer reader.
6711  */
6712 static ssize_t
6713 tracing_read_pipe(struct file *filp, char __user *ubuf,
6714           size_t cnt, loff_t *ppos)
6715 {
6716     struct trace_iterator *iter = filp->private_data;
6717     ssize_t sret;
6718 
6719     /*
6720      * Avoid more than one consumer on a single file descriptor
6721      * This is just a matter of traces coherency, the ring buffer itself
6722      * is protected.
6723      */
6724     mutex_lock(&iter->mutex);
6725 
6726     /* return any leftover data */
6727     sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6728     if (sret != -EBUSY)
6729         goto out;
6730 
6731     trace_seq_init(&iter->seq);
6732 
6733     if (iter->trace->read) {
6734         sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6735         if (sret)
6736             goto out;
6737     }
6738 
6739 waitagain:
6740     sret = tracing_wait_pipe(filp);
6741     if (sret <= 0)
6742         goto out;
6743 
6744     /* stop when tracing is finished */
6745     if (trace_empty(iter)) {
6746         sret = 0;
6747         goto out;
6748     }
6749 
6750     if (cnt >= PAGE_SIZE)
6751         cnt = PAGE_SIZE - 1;
6752 
6753     /* reset all but tr, trace, and overruns */
6754     trace_iterator_reset(iter);
6755     cpumask_clear(iter->started);
6756     trace_seq_init(&iter->seq);
6757 
6758     trace_event_read_lock();
6759     trace_access_lock(iter->cpu_file);
6760     while (trace_find_next_entry_inc(iter) != NULL) {
6761         enum print_line_t ret;
6762         int save_len = iter->seq.seq.len;
6763 
6764         ret = print_trace_line(iter);
6765         if (ret == TRACE_TYPE_PARTIAL_LINE) {
6766             /* don't print partial lines */
6767             iter->seq.seq.len = save_len;
6768             break;
6769         }
6770         if (ret != TRACE_TYPE_NO_CONSUME)
6771             trace_consume(iter);
6772 
6773         if (trace_seq_used(&iter->seq) >= cnt)
6774             break;
6775 
6776         /*
6777          * Setting the full flag means we reached the trace_seq buffer
6778          * size and we should leave by partial output condition above.
6779          * One of the trace_seq_* functions is not used properly.
6780          */
6781         WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6782               iter->ent->type);
6783     }
6784     trace_access_unlock(iter->cpu_file);
6785     trace_event_read_unlock();
6786 
6787     /* Now copy what we have to the user */
6788     sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6789     if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6790         trace_seq_init(&iter->seq);
6791 
6792     /*
6793      * If there was nothing to send to user, in spite of consuming trace
6794      * entries, go back to wait for more entries.
6795      */
6796     if (sret == -EBUSY)
6797         goto waitagain;
6798 
6799 out:
6800     mutex_unlock(&iter->mutex);
6801 
6802     return sret;
6803 }
6804 
6805 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6806                      unsigned int idx)
6807 {
6808     __free_page(spd->pages[idx]);
6809 }
6810 
6811 static size_t
6812 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6813 {
6814     size_t count;
6815     int save_len;
6816     int ret;
6817 
6818     /* Seq buffer is page-sized, exactly what we need. */
6819     for (;;) {
6820         save_len = iter->seq.seq.len;
6821         ret = print_trace_line(iter);
6822 
6823         if (trace_seq_has_overflowed(&iter->seq)) {
6824             iter->seq.seq.len = save_len;
6825             break;
6826         }
6827 
6828         /*
6829          * This should not be hit, because it should only
6830          * be set if the iter->seq overflowed. But check it
6831          * anyway to be safe.
6832          */
6833         if (ret == TRACE_TYPE_PARTIAL_LINE) {
6834             iter->seq.seq.len = save_len;
6835             break;
6836         }
6837 
6838         count = trace_seq_used(&iter->seq) - save_len;
6839         if (rem < count) {
6840             rem = 0;
6841             iter->seq.seq.len = save_len;
6842             break;
6843         }
6844 
6845         if (ret != TRACE_TYPE_NO_CONSUME)
6846             trace_consume(iter);
6847         rem -= count;
6848         if (!trace_find_next_entry_inc(iter))   {
6849             rem = 0;
6850             iter->ent = NULL;
6851             break;
6852         }
6853     }
6854 
6855     return rem;
6856 }
6857 
6858 static ssize_t tracing_splice_read_pipe(struct file *filp,
6859                     loff_t *ppos,
6860                     struct pipe_inode_info *pipe,
6861                     size_t len,
6862                     unsigned int flags)
6863 {
6864     struct page *pages_def[PIPE_DEF_BUFFERS];
6865     struct partial_page partial_def[PIPE_DEF_BUFFERS];
6866     struct trace_iterator *iter = filp->private_data;
6867     struct splice_pipe_desc spd = {
6868         .pages      = pages_def,
6869         .partial    = partial_def,
6870         .nr_pages   = 0, /* This gets updated below. */
6871         .nr_pages_max   = PIPE_DEF_BUFFERS,
6872         .ops        = &default_pipe_buf_ops,
6873         .spd_release    = tracing_spd_release_pipe,
6874     };
6875     ssize_t ret;
6876     size_t rem;
6877     unsigned int i;
6878 
6879     if (splice_grow_spd(pipe, &spd))
6880         return -ENOMEM;
6881 
6882     mutex_lock(&iter->mutex);
6883 
6884     if (iter->trace->splice_read) {
6885         ret = iter->trace->splice_read(iter, filp,
6886                            ppos, pipe, len, flags);
6887         if (ret)
6888             goto out_err;
6889     }
6890 
6891     ret = tracing_wait_pipe(filp);
6892     if (ret <= 0)
6893         goto out_err;
6894 
6895     if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6896         ret = -EFAULT;
6897         goto out_err;
6898     }
6899 
6900     trace_event_read_lock();
6901     trace_access_lock(iter->cpu_file);
6902 
6903     /* Fill as many pages as possible. */
6904     for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6905         spd.pages[i] = alloc_page(GFP_KERNEL);
6906         if (!spd.pages[i])
6907             break;
6908 
6909         rem = tracing_fill_pipe_page(rem, iter);
6910 
6911         /* Copy the data into the page, so we can start over. */
6912         ret = trace_seq_to_buffer(&iter->seq,
6913                       page_address(spd.pages[i]),
6914                       trace_seq_used(&iter->seq));
6915         if (ret < 0) {
6916             __free_page(spd.pages[i]);
6917             break;
6918         }
6919         spd.partial[i].offset = 0;
6920         spd.partial[i].len = trace_seq_used(&iter->seq);
6921 
6922         trace_seq_init(&iter->seq);
6923     }
6924 
6925     trace_access_unlock(iter->cpu_file);
6926     trace_event_read_unlock();
6927     mutex_unlock(&iter->mutex);
6928 
6929     spd.nr_pages = i;
6930 
6931     if (i)
6932         ret = splice_to_pipe(pipe, &spd);
6933     else
6934         ret = 0;
6935 out:
6936     splice_shrink_spd(&spd);
6937     return ret;
6938 
6939 out_err:
6940     mutex_unlock(&iter->mutex);
6941     goto out;
6942 }
6943 
6944 static ssize_t
6945 tracing_entries_read(struct file *filp, char __user *ubuf,
6946              size_t cnt, loff_t *ppos)
6947 {
6948     struct inode *inode = file_inode(filp);
6949     struct trace_array *tr = inode->i_private;
6950     int cpu = tracing_get_cpu(inode);
6951     char buf[64];
6952     int r = 0;
6953     ssize_t ret;
6954 
6955     mutex_lock(&trace_types_lock);
6956 
6957     if (cpu == RING_BUFFER_ALL_CPUS) {
6958         int cpu, buf_size_same;
6959         unsigned long size;
6960 
6961         size = 0;
6962         buf_size_same = 1;
6963         /* check if all cpu sizes are same */
6964         for_each_tracing_cpu(cpu) {
6965             /* fill in the size from first enabled cpu */
6966             if (size == 0)
6967                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6968             if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6969                 buf_size_same = 0;
6970                 break;
6971             }
6972         }
6973 
6974         if (buf_size_same) {
6975             if (!ring_buffer_expanded)
6976                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6977                         size >> 10,
6978                         trace_buf_size >> 10);
6979             else
6980                 r = sprintf(buf, "%lu\n", size >> 10);
6981         } else
6982             r = sprintf(buf, "X\n");
6983     } else
6984         r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6985 
6986     mutex_unlock(&trace_types_lock);
6987 
6988     ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6989     return ret;
6990 }
6991 
6992 static ssize_t
6993 tracing_entries_write(struct file *filp, const char __user *ubuf,
6994               size_t cnt, loff_t *ppos)
6995 {
6996     struct inode *inode = file_inode(filp);
6997     struct trace_array *tr = inode->i_private;
6998     unsigned long val;
6999     int ret;
7000 
7001     ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7002     if (ret)
7003         return ret;
7004 
7005     /* must have at least 1 entry */
7006     if (!val)
7007         return -EINVAL;
7008 
7009     /* value is in KB */
7010     val <<= 10;
7011     ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7012     if (ret < 0)
7013         return ret;
7014 
7015     *ppos += cnt;
7016 
7017     return cnt;
7018 }
7019 
7020 static ssize_t
7021 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7022                 size_t cnt, loff_t *ppos)
7023 {
7024     struct trace_array *tr = filp->private_data;
7025     char buf[64];
7026     int r, cpu;
7027     unsigned long size = 0, expanded_size = 0;
7028 
7029     mutex_lock(&trace_types_lock);
7030     for_each_tracing_cpu(cpu) {
7031         size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7032         if (!ring_buffer_expanded)
7033             expanded_size += trace_buf_size >> 10;
7034     }
7035     if (ring_buffer_expanded)
7036         r = sprintf(buf, "%lu\n", size);
7037     else
7038         r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7039     mutex_unlock(&trace_types_lock);
7040 
7041     return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7042 }
7043 
7044 static ssize_t
7045 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7046               size_t cnt, loff_t *ppos)
7047 {
7048     /*
7049      * There is no need to read what the user has written, this function
7050      * is just to make sure that there is no error when "echo" is used
7051      */
7052 
7053     *ppos += cnt;
7054 
7055     return cnt;
7056 }
7057 
7058 static int
7059 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7060 {
7061     struct trace_array *tr = inode->i_private;
7062 
7063     /* disable tracing ? */
7064     if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7065         tracer_tracing_off(tr);
7066     /* resize the ring buffer to 0 */
7067     tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7068 
7069     trace_array_put(tr);
7070 
7071     return 0;
7072 }
7073 
7074 static ssize_t
7075 tracing_mark_write(struct file *filp, const char __user *ubuf,
7076                     size_t cnt, loff_t *fpos)
7077 {
7078     struct trace_array *tr = filp->private_data;
7079     struct ring_buffer_event *event;
7080     enum event_trigger_type tt = ETT_NONE;
7081     struct trace_buffer *buffer;
7082     struct print_entry *entry;
7083     ssize_t written;
7084     int size;
7085     int len;
7086 
7087 /* Used in tracing_mark_raw_write() as well */
7088 #define FAULTED_STR "<faulted>"
7089 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7090 
7091     if (tracing_disabled)
7092         return -EINVAL;
7093 
7094     if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7095         return -EINVAL;
7096 
7097     if (cnt > TRACE_BUF_SIZE)
7098         cnt = TRACE_BUF_SIZE;
7099 
7100     BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7101 
7102     size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7103 
7104     /* If less than "<faulted>", then make sure we can still add that */
7105     if (cnt < FAULTED_SIZE)
7106         size += FAULTED_SIZE - cnt;
7107 
7108     buffer = tr->array_buffer.buffer;
7109     event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7110                         tracing_gen_ctx());
7111     if (unlikely(!event))
7112         /* Ring buffer disabled, return as if not open for write */
7113         return -EBADF;
7114 
7115     entry = ring_buffer_event_data(event);
7116     entry->ip = _THIS_IP_;
7117 
7118     len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7119     if (len) {
7120         memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7121         cnt = FAULTED_SIZE;
7122         written = -EFAULT;
7123     } else
7124         written = cnt;
7125 
7126     if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7127         /* do not add \n before testing triggers, but add \0 */
7128         entry->buf[cnt] = '\0';
7129         tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7130     }
7131 
7132     if (entry->buf[cnt - 1] != '\n') {
7133         entry->buf[cnt] = '\n';
7134         entry->buf[cnt + 1] = '\0';
7135     } else
7136         entry->buf[cnt] = '\0';
7137 
7138     if (static_branch_unlikely(&trace_marker_exports_enabled))
7139         ftrace_exports(event, TRACE_EXPORT_MARKER);
7140     __buffer_unlock_commit(buffer, event);
7141 
7142     if (tt)
7143         event_triggers_post_call(tr->trace_marker_file, tt);
7144 
7145     return written;
7146 }
7147 
7148 /* Limit it for now to 3K (including tag) */
7149 #define RAW_DATA_MAX_SIZE (1024*3)
7150 
7151 static ssize_t
7152 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7153                     size_t cnt, loff_t *fpos)
7154 {
7155     struct trace_array *tr = filp->private_data;
7156     struct ring_buffer_event *event;
7157     struct trace_buffer *buffer;
7158     struct raw_data_entry *entry;
7159     ssize_t written;
7160     int size;
7161     int len;
7162 
7163 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7164 
7165     if (tracing_disabled)
7166         return -EINVAL;
7167 
7168     if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7169         return -EINVAL;
7170 
7171     /* The marker must at least have a tag id */
7172     if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7173         return -EINVAL;
7174 
7175     if (cnt > TRACE_BUF_SIZE)
7176         cnt = TRACE_BUF_SIZE;
7177 
7178     BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7179 
7180     size = sizeof(*entry) + cnt;
7181     if (cnt < FAULT_SIZE_ID)
7182         size += FAULT_SIZE_ID - cnt;
7183 
7184     buffer = tr->array_buffer.buffer;
7185     event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7186                         tracing_gen_ctx());
7187     if (!event)
7188         /* Ring buffer disabled, return as if not open for write */
7189         return -EBADF;
7190 
7191     entry = ring_buffer_event_data(event);
7192 
7193     len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7194     if (len) {
7195         entry->id = -1;
7196         memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7197         written = -EFAULT;
7198     } else
7199         written = cnt;
7200 
7201     __buffer_unlock_commit(buffer, event);
7202 
7203     return written;
7204 }
7205 
7206 static int tracing_clock_show(struct seq_file *m, void *v)
7207 {
7208     struct trace_array *tr = m->private;
7209     int i;
7210 
7211     for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7212         seq_printf(m,
7213             "%s%s%s%s", i ? " " : "",
7214             i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7215             i == tr->clock_id ? "]" : "");
7216     seq_putc(m, '\n');
7217 
7218     return 0;
7219 }
7220 
7221 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7222 {
7223     int i;
7224 
7225     for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7226         if (strcmp(trace_clocks[i].name, clockstr) == 0)
7227             break;
7228     }
7229     if (i == ARRAY_SIZE(trace_clocks))
7230         return -EINVAL;
7231 
7232     mutex_lock(&trace_types_lock);
7233 
7234     tr->clock_id = i;
7235 
7236     ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7237 
7238     /*
7239      * New clock may not be consistent with the previous clock.
7240      * Reset the buffer so that it doesn't have incomparable timestamps.
7241      */
7242     tracing_reset_online_cpus(&tr->array_buffer);
7243 
7244 #ifdef CONFIG_TRACER_MAX_TRACE
7245     if (tr->max_buffer.buffer)
7246         ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7247     tracing_reset_online_cpus(&tr->max_buffer);
7248 #endif
7249 
7250     mutex_unlock(&trace_types_lock);
7251 
7252     return 0;
7253 }
7254 
7255 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7256                    size_t cnt, loff_t *fpos)
7257 {
7258     struct seq_file *m = filp->private_data;
7259     struct trace_array *tr = m->private;
7260     char buf[64];
7261     const char *clockstr;
7262     int ret;
7263 
7264     if (cnt >= sizeof(buf))
7265         return -EINVAL;
7266 
7267     if (copy_from_user(buf, ubuf, cnt))
7268         return -EFAULT;
7269 
7270     buf[cnt] = 0;
7271 
7272     clockstr = strstrip(buf);
7273 
7274     ret = tracing_set_clock(tr, clockstr);
7275     if (ret)
7276         return ret;
7277 
7278     *fpos += cnt;
7279 
7280     return cnt;
7281 }
7282 
7283 static int tracing_clock_open(struct inode *inode, struct file *file)
7284 {
7285     struct trace_array *tr = inode->i_private;
7286     int ret;
7287 
7288     ret = tracing_check_open_get_tr(tr);
7289     if (ret)
7290         return ret;
7291 
7292     ret = single_open(file, tracing_clock_show, inode->i_private);
7293     if (ret < 0)
7294         trace_array_put(tr);
7295 
7296     return ret;
7297 }
7298 
7299 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7300 {
7301     struct trace_array *tr = m->private;
7302 
7303     mutex_lock(&trace_types_lock);
7304 
7305     if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7306         seq_puts(m, "delta [absolute]\n");
7307     else
7308         seq_puts(m, "[delta] absolute\n");
7309 
7310     mutex_unlock(&trace_types_lock);
7311 
7312     return 0;
7313 }
7314 
7315 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7316 {
7317     struct trace_array *tr = inode->i_private;
7318     int ret;
7319 
7320     ret = tracing_check_open_get_tr(tr);
7321     if (ret)
7322         return ret;
7323 
7324     ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7325     if (ret < 0)
7326         trace_array_put(tr);
7327 
7328     return ret;
7329 }
7330 
7331 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7332 {
7333     if (rbe == this_cpu_read(trace_buffered_event))
7334         return ring_buffer_time_stamp(buffer);
7335 
7336     return ring_buffer_event_time_stamp(buffer, rbe);
7337 }
7338 
7339 /*
7340  * Set or disable using the per CPU trace_buffer_event when possible.
7341  */
7342 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7343 {
7344     int ret = 0;
7345 
7346     mutex_lock(&trace_types_lock);
7347 
7348     if (set && tr->no_filter_buffering_ref++)
7349         goto out;
7350 
7351     if (!set) {
7352         if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7353             ret = -EINVAL;
7354             goto out;
7355         }
7356 
7357         --tr->no_filter_buffering_ref;
7358     }
7359  out:
7360     mutex_unlock(&trace_types_lock);
7361 
7362     return ret;
7363 }
7364 
7365 struct ftrace_buffer_info {
7366     struct trace_iterator   iter;
7367     void            *spare;
7368     unsigned int        spare_cpu;
7369     unsigned int        read;
7370 };
7371 
7372 #ifdef CONFIG_TRACER_SNAPSHOT
7373 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7374 {
7375     struct trace_array *tr = inode->i_private;
7376     struct trace_iterator *iter;
7377     struct seq_file *m;
7378     int ret;
7379 
7380     ret = tracing_check_open_get_tr(tr);
7381     if (ret)
7382         return ret;
7383 
7384     if (file->f_mode & FMODE_READ) {
7385         iter = __tracing_open(inode, file, true);
7386         if (IS_ERR(iter))
7387             ret = PTR_ERR(iter);
7388     } else {
7389         /* Writes still need the seq_file to hold the private data */
7390         ret = -ENOMEM;
7391         m = kzalloc(sizeof(*m), GFP_KERNEL);
7392         if (!m)
7393             goto out;
7394         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7395         if (!iter) {
7396             kfree(m);
7397             goto out;
7398         }
7399         ret = 0;
7400 
7401         iter->tr = tr;
7402         iter->array_buffer = &tr->max_buffer;
7403         iter->cpu_file = tracing_get_cpu(inode);
7404         m->private = iter;
7405         file->private_data = m;
7406     }
7407 out:
7408     if (ret < 0)
7409         trace_array_put(tr);
7410 
7411     return ret;
7412 }
7413 
7414 static ssize_t
7415 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7416                loff_t *ppos)
7417 {
7418     struct seq_file *m = filp->private_data;
7419     struct trace_iterator *iter = m->private;
7420     struct trace_array *tr = iter->tr;
7421     unsigned long val;
7422     int ret;
7423 
7424     ret = tracing_update_buffers();
7425     if (ret < 0)
7426         return ret;
7427 
7428     ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7429     if (ret)
7430         return ret;
7431 
7432     mutex_lock(&trace_types_lock);
7433 
7434     if (tr->current_trace->use_max_tr) {
7435         ret = -EBUSY;
7436         goto out;
7437     }
7438 
7439     arch_spin_lock(&tr->max_lock);
7440     if (tr->cond_snapshot)
7441         ret = -EBUSY;
7442     arch_spin_unlock(&tr->max_lock);
7443     if (ret)
7444         goto out;
7445 
7446     switch (val) {
7447     case 0:
7448         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7449             ret = -EINVAL;
7450             break;
7451         }
7452         if (tr->allocated_snapshot)
7453             free_snapshot(tr);
7454         break;
7455     case 1:
7456 /* Only allow per-cpu swap if the ring buffer supports it */
7457 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7458         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7459             ret = -EINVAL;
7460             break;
7461         }
7462 #endif
7463         if (tr->allocated_snapshot)
7464             ret = resize_buffer_duplicate_size(&tr->max_buffer,
7465                     &tr->array_buffer, iter->cpu_file);
7466         else
7467             ret = tracing_alloc_snapshot_instance(tr);
7468         if (ret < 0)
7469             break;
7470         local_irq_disable();
7471         /* Now, we're going to swap */
7472         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7473             update_max_tr(tr, current, smp_processor_id(), NULL);
7474         else
7475             update_max_tr_single(tr, current, iter->cpu_file);
7476         local_irq_enable();
7477         break;
7478     default:
7479         if (tr->allocated_snapshot) {
7480             if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7481                 tracing_reset_online_cpus(&tr->max_buffer);
7482             else
7483                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7484         }
7485         break;
7486     }
7487 
7488     if (ret >= 0) {
7489         *ppos += cnt;
7490         ret = cnt;
7491     }
7492 out:
7493     mutex_unlock(&trace_types_lock);
7494     return ret;
7495 }
7496 
7497 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7498 {
7499     struct seq_file *m = file->private_data;
7500     int ret;
7501 
7502     ret = tracing_release(inode, file);
7503 
7504     if (file->f_mode & FMODE_READ)
7505         return ret;
7506 
7507     /* If write only, the seq_file is just a stub */
7508     if (m)
7509         kfree(m->private);
7510     kfree(m);
7511 
7512     return 0;
7513 }
7514 
7515 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7516 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7517                     size_t count, loff_t *ppos);
7518 static int tracing_buffers_release(struct inode *inode, struct file *file);
7519 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7520            struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7521 
7522 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7523 {
7524     struct ftrace_buffer_info *info;
7525     int ret;
7526 
7527     /* The following checks for tracefs lockdown */
7528     ret = tracing_buffers_open(inode, filp);
7529     if (ret < 0)
7530         return ret;
7531 
7532     info = filp->private_data;
7533 
7534     if (info->iter.trace->use_max_tr) {
7535         tracing_buffers_release(inode, filp);
7536         return -EBUSY;
7537     }
7538 
7539     info->iter.snapshot = true;
7540     info->iter.array_buffer = &info->iter.tr->max_buffer;
7541 
7542     return ret;
7543 }
7544 
7545 #endif /* CONFIG_TRACER_SNAPSHOT */
7546 
7547 
7548 static const struct file_operations tracing_thresh_fops = {
7549     .open       = tracing_open_generic,
7550     .read       = tracing_thresh_read,
7551     .write      = tracing_thresh_write,
7552     .llseek     = generic_file_llseek,
7553 };
7554 
7555 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7556 static const struct file_operations tracing_max_lat_fops = {
7557     .open       = tracing_open_generic,
7558     .read       = tracing_max_lat_read,
7559     .write      = tracing_max_lat_write,
7560     .llseek     = generic_file_llseek,
7561 };
7562 #endif
7563 
7564 static const struct file_operations set_tracer_fops = {
7565     .open       = tracing_open_generic,
7566     .read       = tracing_set_trace_read,
7567     .write      = tracing_set_trace_write,
7568     .llseek     = generic_file_llseek,
7569 };
7570 
7571 static const struct file_operations tracing_pipe_fops = {
7572     .open       = tracing_open_pipe,
7573     .poll       = tracing_poll_pipe,
7574     .read       = tracing_read_pipe,
7575     .splice_read    = tracing_splice_read_pipe,
7576     .release    = tracing_release_pipe,
7577     .llseek     = no_llseek,
7578 };
7579 
7580 static const struct file_operations tracing_entries_fops = {
7581     .open       = tracing_open_generic_tr,
7582     .read       = tracing_entries_read,
7583     .write      = tracing_entries_write,
7584     .llseek     = generic_file_llseek,
7585     .release    = tracing_release_generic_tr,
7586 };
7587 
7588 static const struct file_operations tracing_total_entries_fops = {
7589     .open       = tracing_open_generic_tr,
7590     .read       = tracing_total_entries_read,
7591     .llseek     = generic_file_llseek,
7592     .release    = tracing_release_generic_tr,
7593 };
7594 
7595 static const struct file_operations tracing_free_buffer_fops = {
7596     .open       = tracing_open_generic_tr,
7597     .write      = tracing_free_buffer_write,
7598     .release    = tracing_free_buffer_release,
7599 };
7600 
7601 static const struct file_operations tracing_mark_fops = {
7602     .open       = tracing_mark_open,
7603     .write      = tracing_mark_write,
7604     .release    = tracing_release_generic_tr,
7605 };
7606 
7607 static const struct file_operations tracing_mark_raw_fops = {
7608     .open       = tracing_mark_open,
7609     .write      = tracing_mark_raw_write,
7610     .release    = tracing_release_generic_tr,
7611 };
7612 
7613 static const struct file_operations trace_clock_fops = {
7614     .open       = tracing_clock_open,
7615     .read       = seq_read,
7616     .llseek     = seq_lseek,
7617     .release    = tracing_single_release_tr,
7618     .write      = tracing_clock_write,
7619 };
7620 
7621 static const struct file_operations trace_time_stamp_mode_fops = {
7622     .open       = tracing_time_stamp_mode_open,
7623     .read       = seq_read,
7624     .llseek     = seq_lseek,
7625     .release    = tracing_single_release_tr,
7626 };
7627 
7628 #ifdef CONFIG_TRACER_SNAPSHOT
7629 static const struct file_operations snapshot_fops = {
7630     .open       = tracing_snapshot_open,
7631     .read       = seq_read,
7632     .write      = tracing_snapshot_write,
7633     .llseek     = tracing_lseek,
7634     .release    = tracing_snapshot_release,
7635 };
7636 
7637 static const struct file_operations snapshot_raw_fops = {
7638     .open       = snapshot_raw_open,
7639     .read       = tracing_buffers_read,
7640     .release    = tracing_buffers_release,
7641     .splice_read    = tracing_buffers_splice_read,
7642     .llseek     = no_llseek,
7643 };
7644 
7645 #endif /* CONFIG_TRACER_SNAPSHOT */
7646 
7647 /*
7648  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7649  * @filp: The active open file structure
7650  * @ubuf: The userspace provided buffer to read value into
7651  * @cnt: The maximum number of bytes to read
7652  * @ppos: The current "file" position
7653  *
7654  * This function implements the write interface for a struct trace_min_max_param.
7655  * The filp->private_data must point to a trace_min_max_param structure that
7656  * defines where to write the value, the min and the max acceptable values,
7657  * and a lock to protect the write.
7658  */
7659 static ssize_t
7660 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7661 {
7662     struct trace_min_max_param *param = filp->private_data;
7663     u64 val;
7664     int err;
7665 
7666     if (!param)
7667         return -EFAULT;
7668 
7669     err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7670     if (err)
7671         return err;
7672 
7673     if (param->lock)
7674         mutex_lock(param->lock);
7675 
7676     if (param->min && val < *param->min)
7677         err = -EINVAL;
7678 
7679     if (param->max && val > *param->max)
7680         err = -EINVAL;
7681 
7682     if (!err)
7683         *param->val = val;
7684 
7685     if (param->lock)
7686         mutex_unlock(param->lock);
7687 
7688     if (err)
7689         return err;
7690 
7691     return cnt;
7692 }
7693 
7694 /*
7695  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7696  * @filp: The active open file structure
7697  * @ubuf: The userspace provided buffer to read value into
7698  * @cnt: The maximum number of bytes to read
7699  * @ppos: The current "file" position
7700  *
7701  * This function implements the read interface for a struct trace_min_max_param.
7702  * The filp->private_data must point to a trace_min_max_param struct with valid
7703  * data.
7704  */
7705 static ssize_t
7706 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7707 {
7708     struct trace_min_max_param *param = filp->private_data;
7709     char buf[U64_STR_SIZE];
7710     int len;
7711     u64 val;
7712 
7713     if (!param)
7714         return -EFAULT;
7715 
7716     val = *param->val;
7717 
7718     if (cnt > sizeof(buf))
7719         cnt = sizeof(buf);
7720 
7721     len = snprintf(buf, sizeof(buf), "%llu\n", val);
7722 
7723     return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7724 }
7725 
7726 const struct file_operations trace_min_max_fops = {
7727     .open       = tracing_open_generic,
7728     .read       = trace_min_max_read,
7729     .write      = trace_min_max_write,
7730 };
7731 
7732 #define TRACING_LOG_ERRS_MAX    8
7733 #define TRACING_LOG_LOC_MAX 128
7734 
7735 #define CMD_PREFIX "  Command: "
7736 
7737 struct err_info {
7738     const char  **errs; /* ptr to loc-specific array of err strings */
7739     u8      type;   /* index into errs -> specific err string */
7740     u16     pos;    /* caret position */
7741     u64     ts;
7742 };
7743 
7744 struct tracing_log_err {
7745     struct list_head    list;
7746     struct err_info     info;
7747     char            loc[TRACING_LOG_LOC_MAX]; /* err location */
7748     char            *cmd;                     /* what caused err */
7749 };
7750 
7751 static DEFINE_MUTEX(tracing_err_log_lock);
7752 
7753 static struct tracing_log_err *alloc_tracing_log_err(int len)
7754 {
7755     struct tracing_log_err *err;
7756 
7757     err = kzalloc(sizeof(*err), GFP_KERNEL);
7758     if (!err)
7759         return ERR_PTR(-ENOMEM);
7760 
7761     err->cmd = kzalloc(len, GFP_KERNEL);
7762     if (!err->cmd) {
7763         kfree(err);
7764         return ERR_PTR(-ENOMEM);
7765     }
7766 
7767     return err;
7768 }
7769 
7770 static void free_tracing_log_err(struct tracing_log_err *err)
7771 {
7772     kfree(err->cmd);
7773     kfree(err);
7774 }
7775 
7776 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7777                            int len)
7778 {
7779     struct tracing_log_err *err;
7780 
7781     if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7782         err = alloc_tracing_log_err(len);
7783         if (PTR_ERR(err) != -ENOMEM)
7784             tr->n_err_log_entries++;
7785 
7786         return err;
7787     }
7788 
7789     err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7790     kfree(err->cmd);
7791     err->cmd = kzalloc(len, GFP_KERNEL);
7792     if (!err->cmd)
7793         return ERR_PTR(-ENOMEM);
7794     list_del(&err->list);
7795 
7796     return err;
7797 }
7798 
7799 /**
7800  * err_pos - find the position of a string within a command for error careting
7801  * @cmd: The tracing command that caused the error
7802  * @str: The string to position the caret at within @cmd
7803  *
7804  * Finds the position of the first occurrence of @str within @cmd.  The
7805  * return value can be passed to tracing_log_err() for caret placement
7806  * within @cmd.
7807  *
7808  * Returns the index within @cmd of the first occurrence of @str or 0
7809  * if @str was not found.
7810  */
7811 unsigned int err_pos(char *cmd, const char *str)
7812 {
7813     char *found;
7814 
7815     if (WARN_ON(!strlen(cmd)))
7816         return 0;
7817 
7818     found = strstr(cmd, str);
7819     if (found)
7820         return found - cmd;
7821 
7822     return 0;
7823 }
7824 
7825 /**
7826  * tracing_log_err - write an error to the tracing error log
7827  * @tr: The associated trace array for the error (NULL for top level array)
7828  * @loc: A string describing where the error occurred
7829  * @cmd: The tracing command that caused the error
7830  * @errs: The array of loc-specific static error strings
7831  * @type: The index into errs[], which produces the specific static err string
7832  * @pos: The position the caret should be placed in the cmd
7833  *
7834  * Writes an error into tracing/error_log of the form:
7835  *
7836  * <loc>: error: <text>
7837  *   Command: <cmd>
7838  *              ^
7839  *
7840  * tracing/error_log is a small log file containing the last
7841  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7842  * unless there has been a tracing error, and the error log can be
7843  * cleared and have its memory freed by writing the empty string in
7844  * truncation mode to it i.e. echo > tracing/error_log.
7845  *
7846  * NOTE: the @errs array along with the @type param are used to
7847  * produce a static error string - this string is not copied and saved
7848  * when the error is logged - only a pointer to it is saved.  See
7849  * existing callers for examples of how static strings are typically
7850  * defined for use with tracing_log_err().
7851  */
7852 void tracing_log_err(struct trace_array *tr,
7853              const char *loc, const char *cmd,
7854              const char **errs, u8 type, u16 pos)
7855 {
7856     struct tracing_log_err *err;
7857     int len = 0;
7858 
7859     if (!tr)
7860         tr = &global_trace;
7861 
7862     len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7863 
7864     mutex_lock(&tracing_err_log_lock);
7865     err = get_tracing_log_err(tr, len);
7866     if (PTR_ERR(err) == -ENOMEM) {
7867         mutex_unlock(&tracing_err_log_lock);
7868         return;
7869     }
7870 
7871     snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7872     snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7873 
7874     err->info.errs = errs;
7875     err->info.type = type;
7876     err->info.pos = pos;
7877     err->info.ts = local_clock();
7878 
7879     list_add_tail(&err->list, &tr->err_log);
7880     mutex_unlock(&tracing_err_log_lock);
7881 }
7882 
7883 static void clear_tracing_err_log(struct trace_array *tr)
7884 {
7885     struct tracing_log_err *err, *next;
7886 
7887     mutex_lock(&tracing_err_log_lock);
7888     list_for_each_entry_safe(err, next, &tr->err_log, list) {
7889         list_del(&err->list);
7890         free_tracing_log_err(err);
7891     }
7892 
7893     tr->n_err_log_entries = 0;
7894     mutex_unlock(&tracing_err_log_lock);
7895 }
7896 
7897 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7898 {
7899     struct trace_array *tr = m->private;
7900 
7901     mutex_lock(&tracing_err_log_lock);
7902 
7903     return seq_list_start(&tr->err_log, *pos);
7904 }
7905 
7906 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7907 {
7908     struct trace_array *tr = m->private;
7909 
7910     return seq_list_next(v, &tr->err_log, pos);
7911 }
7912 
7913 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7914 {
7915     mutex_unlock(&tracing_err_log_lock);
7916 }
7917 
7918 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7919 {
7920     u16 i;
7921 
7922     for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7923         seq_putc(m, ' ');
7924     for (i = 0; i < pos; i++)
7925         seq_putc(m, ' ');
7926     seq_puts(m, "^\n");
7927 }
7928 
7929 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7930 {
7931     struct tracing_log_err *err = v;
7932 
7933     if (err) {
7934         const char *err_text = err->info.errs[err->info.type];
7935         u64 sec = err->info.ts;
7936         u32 nsec;
7937 
7938         nsec = do_div(sec, NSEC_PER_SEC);
7939         seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7940                err->loc, err_text);
7941         seq_printf(m, "%s", err->cmd);
7942         tracing_err_log_show_pos(m, err->info.pos);
7943     }
7944 
7945     return 0;
7946 }
7947 
7948 static const struct seq_operations tracing_err_log_seq_ops = {
7949     .start  = tracing_err_log_seq_start,
7950     .next   = tracing_err_log_seq_next,
7951     .stop   = tracing_err_log_seq_stop,
7952     .show   = tracing_err_log_seq_show
7953 };
7954 
7955 static int tracing_err_log_open(struct inode *inode, struct file *file)
7956 {
7957     struct trace_array *tr = inode->i_private;
7958     int ret = 0;
7959 
7960     ret = tracing_check_open_get_tr(tr);
7961     if (ret)
7962         return ret;
7963 
7964     /* If this file was opened for write, then erase contents */
7965     if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7966         clear_tracing_err_log(tr);
7967 
7968     if (file->f_mode & FMODE_READ) {
7969         ret = seq_open(file, &tracing_err_log_seq_ops);
7970         if (!ret) {
7971             struct seq_file *m = file->private_data;
7972             m->private = tr;
7973         } else {
7974             trace_array_put(tr);
7975         }
7976     }
7977     return ret;
7978 }
7979 
7980 static ssize_t tracing_err_log_write(struct file *file,
7981                      const char __user *buffer,
7982                      size_t count, loff_t *ppos)
7983 {
7984     return count;
7985 }
7986 
7987 static int tracing_err_log_release(struct inode *inode, struct file *file)
7988 {
7989     struct trace_array *tr = inode->i_private;
7990 
7991     trace_array_put(tr);
7992 
7993     if (file->f_mode & FMODE_READ)
7994         seq_release(inode, file);
7995 
7996     return 0;
7997 }
7998 
7999 static const struct file_operations tracing_err_log_fops = {
8000     .open           = tracing_err_log_open,
8001     .write      = tracing_err_log_write,
8002     .read           = seq_read,
8003     .llseek         = seq_lseek,
8004     .release        = tracing_err_log_release,
8005 };
8006 
8007 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8008 {
8009     struct trace_array *tr = inode->i_private;
8010     struct ftrace_buffer_info *info;
8011     int ret;
8012 
8013     ret = tracing_check_open_get_tr(tr);
8014     if (ret)
8015         return ret;
8016 
8017     info = kvzalloc(sizeof(*info), GFP_KERNEL);
8018     if (!info) {
8019         trace_array_put(tr);
8020         return -ENOMEM;
8021     }
8022 
8023     mutex_lock(&trace_types_lock);
8024 
8025     info->iter.tr       = tr;
8026     info->iter.cpu_file = tracing_get_cpu(inode);
8027     info->iter.trace    = tr->current_trace;
8028     info->iter.array_buffer = &tr->array_buffer;
8029     info->spare     = NULL;
8030     /* Force reading ring buffer for first read */
8031     info->read      = (unsigned int)-1;
8032 
8033     filp->private_data = info;
8034 
8035     tr->trace_ref++;
8036 
8037     mutex_unlock(&trace_types_lock);
8038 
8039     ret = nonseekable_open(inode, filp);
8040     if (ret < 0)
8041         trace_array_put(tr);
8042 
8043     return ret;
8044 }
8045 
8046 static __poll_t
8047 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8048 {
8049     struct ftrace_buffer_info *info = filp->private_data;
8050     struct trace_iterator *iter = &info->iter;
8051 
8052     return trace_poll(iter, filp, poll_table);
8053 }
8054 
8055 static ssize_t
8056 tracing_buffers_read(struct file *filp, char __user *ubuf,
8057              size_t count, loff_t *ppos)
8058 {
8059     struct ftrace_buffer_info *info = filp->private_data;
8060     struct trace_iterator *iter = &info->iter;
8061     ssize_t ret = 0;
8062     ssize_t size;
8063 
8064     if (!count)
8065         return 0;
8066 
8067 #ifdef CONFIG_TRACER_MAX_TRACE
8068     if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8069         return -EBUSY;
8070 #endif
8071 
8072     if (!info->spare) {
8073         info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8074                               iter->cpu_file);
8075         if (IS_ERR(info->spare)) {
8076             ret = PTR_ERR(info->spare);
8077             info->spare = NULL;
8078         } else {
8079             info->spare_cpu = iter->cpu_file;
8080         }
8081     }
8082     if (!info->spare)
8083         return ret;
8084 
8085     /* Do we have previous read data to read? */
8086     if (info->read < PAGE_SIZE)
8087         goto read;
8088 
8089  again:
8090     trace_access_lock(iter->cpu_file);
8091     ret = ring_buffer_read_page(iter->array_buffer->buffer,
8092                     &info->spare,
8093                     count,
8094                     iter->cpu_file, 0);
8095     trace_access_unlock(iter->cpu_file);
8096 
8097     if (ret < 0) {
8098         if (trace_empty(iter)) {
8099             if ((filp->f_flags & O_NONBLOCK))
8100                 return -EAGAIN;
8101 
8102             ret = wait_on_pipe(iter, 0);
8103             if (ret)
8104                 return ret;
8105 
8106             goto again;
8107         }
8108         return 0;
8109     }
8110 
8111     info->read = 0;
8112  read:
8113     size = PAGE_SIZE - info->read;
8114     if (size > count)
8115         size = count;
8116 
8117     ret = copy_to_user(ubuf, info->spare + info->read, size);
8118     if (ret == size)
8119         return -EFAULT;
8120 
8121     size -= ret;
8122 
8123     *ppos += size;
8124     info->read += size;
8125 
8126     return size;
8127 }
8128 
8129 static int tracing_buffers_release(struct inode *inode, struct file *file)
8130 {
8131     struct ftrace_buffer_info *info = file->private_data;
8132     struct trace_iterator *iter = &info->iter;
8133 
8134     mutex_lock(&trace_types_lock);
8135 
8136     iter->tr->trace_ref--;
8137 
8138     __trace_array_put(iter->tr);
8139 
8140     if (info->spare)
8141         ring_buffer_free_read_page(iter->array_buffer->buffer,
8142                        info->spare_cpu, info->spare);
8143     kvfree(info);
8144 
8145     mutex_unlock(&trace_types_lock);
8146 
8147     return 0;
8148 }
8149 
8150 struct buffer_ref {
8151     struct trace_buffer *buffer;
8152     void            *page;
8153     int         cpu;
8154     refcount_t      refcount;
8155 };
8156 
8157 static void buffer_ref_release(struct buffer_ref *ref)
8158 {
8159     if (!refcount_dec_and_test(&ref->refcount))
8160         return;
8161     ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8162     kfree(ref);
8163 }
8164 
8165 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8166                     struct pipe_buffer *buf)
8167 {
8168     struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8169 
8170     buffer_ref_release(ref);
8171     buf->private = 0;
8172 }
8173 
8174 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8175                 struct pipe_buffer *buf)
8176 {
8177     struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8178 
8179     if (refcount_read(&ref->refcount) > INT_MAX/2)
8180         return false;
8181 
8182     refcount_inc(&ref->refcount);
8183     return true;
8184 }
8185 
8186 /* Pipe buffer operations for a buffer. */
8187 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8188     .release        = buffer_pipe_buf_release,
8189     .get            = buffer_pipe_buf_get,
8190 };
8191 
8192 /*
8193  * Callback from splice_to_pipe(), if we need to release some pages
8194  * at the end of the spd in case we error'ed out in filling the pipe.
8195  */
8196 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8197 {
8198     struct buffer_ref *ref =
8199         (struct buffer_ref *)spd->partial[i].private;
8200 
8201     buffer_ref_release(ref);
8202     spd->partial[i].private = 0;
8203 }
8204 
8205 static ssize_t
8206 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8207                 struct pipe_inode_info *pipe, size_t len,
8208                 unsigned int flags)
8209 {
8210     struct ftrace_buffer_info *info = file->private_data;
8211     struct trace_iterator *iter = &info->iter;
8212     struct partial_page partial_def[PIPE_DEF_BUFFERS];
8213     struct page *pages_def[PIPE_DEF_BUFFERS];
8214     struct splice_pipe_desc spd = {
8215         .pages      = pages_def,
8216         .partial    = partial_def,
8217         .nr_pages_max   = PIPE_DEF_BUFFERS,
8218         .ops        = &buffer_pipe_buf_ops,
8219         .spd_release    = buffer_spd_release,
8220     };
8221     struct buffer_ref *ref;
8222     int entries, i;
8223     ssize_t ret = 0;
8224 
8225 #ifdef CONFIG_TRACER_MAX_TRACE
8226     if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8227         return -EBUSY;
8228 #endif
8229 
8230     if (*ppos & (PAGE_SIZE - 1))
8231         return -EINVAL;
8232 
8233     if (len & (PAGE_SIZE - 1)) {
8234         if (len < PAGE_SIZE)
8235             return -EINVAL;
8236         len &= PAGE_MASK;
8237     }
8238 
8239     if (splice_grow_spd(pipe, &spd))
8240         return -ENOMEM;
8241 
8242  again:
8243     trace_access_lock(iter->cpu_file);
8244     entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8245 
8246     for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8247         struct page *page;
8248         int r;
8249 
8250         ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8251         if (!ref) {
8252             ret = -ENOMEM;
8253             break;
8254         }
8255 
8256         refcount_set(&ref->refcount, 1);
8257         ref->buffer = iter->array_buffer->buffer;
8258         ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8259         if (IS_ERR(ref->page)) {
8260             ret = PTR_ERR(ref->page);
8261             ref->page = NULL;
8262             kfree(ref);
8263             break;
8264         }
8265         ref->cpu = iter->cpu_file;
8266 
8267         r = ring_buffer_read_page(ref->buffer, &ref->page,
8268                       len, iter->cpu_file, 1);
8269         if (r < 0) {
8270             ring_buffer_free_read_page(ref->buffer, ref->cpu,
8271                            ref->page);
8272             kfree(ref);
8273             break;
8274         }
8275 
8276         page = virt_to_page(ref->page);
8277 
8278         spd.pages[i] = page;
8279         spd.partial[i].len = PAGE_SIZE;
8280         spd.partial[i].offset = 0;
8281         spd.partial[i].private = (unsigned long)ref;
8282         spd.nr_pages++;
8283         *ppos += PAGE_SIZE;
8284 
8285         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8286     }
8287 
8288     trace_access_unlock(iter->cpu_file);
8289     spd.nr_pages = i;
8290 
8291     /* did we read anything? */
8292     if (!spd.nr_pages) {
8293         if (ret)
8294             goto out;
8295 
8296         ret = -EAGAIN;
8297         if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8298             goto out;
8299 
8300         ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8301         if (ret)
8302             goto out;
8303 
8304         goto again;
8305     }
8306 
8307     ret = splice_to_pipe(pipe, &spd);
8308 out:
8309     splice_shrink_spd(&spd);
8310 
8311     return ret;
8312 }
8313 
8314 static const struct file_operations tracing_buffers_fops = {
8315     .open       = tracing_buffers_open,
8316     .read       = tracing_buffers_read,
8317     .poll       = tracing_buffers_poll,
8318     .release    = tracing_buffers_release,
8319     .splice_read    = tracing_buffers_splice_read,
8320     .llseek     = no_llseek,
8321 };
8322 
8323 static ssize_t
8324 tracing_stats_read(struct file *filp, char __user *ubuf,
8325            size_t count, loff_t *ppos)
8326 {
8327     struct inode *inode = file_inode(filp);
8328     struct trace_array *tr = inode->i_private;
8329     struct array_buffer *trace_buf = &tr->array_buffer;
8330     int cpu = tracing_get_cpu(inode);
8331     struct trace_seq *s;
8332     unsigned long cnt;
8333     unsigned long long t;
8334     unsigned long usec_rem;
8335 
8336     s = kmalloc(sizeof(*s), GFP_KERNEL);
8337     if (!s)
8338         return -ENOMEM;
8339 
8340     trace_seq_init(s);
8341 
8342     cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8343     trace_seq_printf(s, "entries: %ld\n", cnt);
8344 
8345     cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8346     trace_seq_printf(s, "overrun: %ld\n", cnt);
8347 
8348     cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8349     trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8350 
8351     cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8352     trace_seq_printf(s, "bytes: %ld\n", cnt);
8353 
8354     if (trace_clocks[tr->clock_id].in_ns) {
8355         /* local or global for trace_clock */
8356         t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8357         usec_rem = do_div(t, USEC_PER_SEC);
8358         trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8359                                 t, usec_rem);
8360 
8361         t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8362         usec_rem = do_div(t, USEC_PER_SEC);
8363         trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8364     } else {
8365         /* counter or tsc mode for trace_clock */
8366         trace_seq_printf(s, "oldest event ts: %llu\n",
8367                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8368 
8369         trace_seq_printf(s, "now ts: %llu\n",
8370                 ring_buffer_time_stamp(trace_buf->buffer));
8371     }
8372 
8373     cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8374     trace_seq_printf(s, "dropped events: %ld\n", cnt);
8375 
8376     cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8377     trace_seq_printf(s, "read events: %ld\n", cnt);
8378 
8379     count = simple_read_from_buffer(ubuf, count, ppos,
8380                     s->buffer, trace_seq_used(s));
8381 
8382     kfree(s);
8383 
8384     return count;
8385 }
8386 
8387 static const struct file_operations tracing_stats_fops = {
8388     .open       = tracing_open_generic_tr,
8389     .read       = tracing_stats_read,
8390     .llseek     = generic_file_llseek,
8391     .release    = tracing_release_generic_tr,
8392 };
8393 
8394 #ifdef CONFIG_DYNAMIC_FTRACE
8395 
8396 static ssize_t
8397 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8398           size_t cnt, loff_t *ppos)
8399 {
8400     ssize_t ret;
8401     char *buf;
8402     int r;
8403 
8404     /* 256 should be plenty to hold the amount needed */
8405     buf = kmalloc(256, GFP_KERNEL);
8406     if (!buf)
8407         return -ENOMEM;
8408 
8409     r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8410               ftrace_update_tot_cnt,
8411               ftrace_number_of_pages,
8412               ftrace_number_of_groups);
8413 
8414     ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8415     kfree(buf);
8416     return ret;
8417 }
8418 
8419 static const struct file_operations tracing_dyn_info_fops = {
8420     .open       = tracing_open_generic,
8421     .read       = tracing_read_dyn_info,
8422     .llseek     = generic_file_llseek,
8423 };
8424 #endif /* CONFIG_DYNAMIC_FTRACE */
8425 
8426 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8427 static void
8428 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8429         struct trace_array *tr, struct ftrace_probe_ops *ops,
8430         void *data)
8431 {
8432     tracing_snapshot_instance(tr);
8433 }
8434 
8435 static void
8436 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8437               struct trace_array *tr, struct ftrace_probe_ops *ops,
8438               void *data)
8439 {
8440     struct ftrace_func_mapper *mapper = data;
8441     long *count = NULL;
8442 
8443     if (mapper)
8444         count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8445 
8446     if (count) {
8447 
8448         if (*count <= 0)
8449             return;
8450 
8451         (*count)--;
8452     }
8453 
8454     tracing_snapshot_instance(tr);
8455 }
8456 
8457 static int
8458 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8459               struct ftrace_probe_ops *ops, void *data)
8460 {
8461     struct ftrace_func_mapper *mapper = data;
8462     long *count = NULL;
8463 
8464     seq_printf(m, "%ps:", (void *)ip);
8465 
8466     seq_puts(m, "snapshot");
8467 
8468     if (mapper)
8469         count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8470 
8471     if (count)
8472         seq_printf(m, ":count=%ld\n", *count);
8473     else
8474         seq_puts(m, ":unlimited\n");
8475 
8476     return 0;
8477 }
8478 
8479 static int
8480 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8481              unsigned long ip, void *init_data, void **data)
8482 {
8483     struct ftrace_func_mapper *mapper = *data;
8484 
8485     if (!mapper) {
8486         mapper = allocate_ftrace_func_mapper();
8487         if (!mapper)
8488             return -ENOMEM;
8489         *data = mapper;
8490     }
8491 
8492     return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8493 }
8494 
8495 static void
8496 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8497              unsigned long ip, void *data)
8498 {
8499     struct ftrace_func_mapper *mapper = data;
8500 
8501     if (!ip) {
8502         if (!mapper)
8503             return;
8504         free_ftrace_func_mapper(mapper, NULL);
8505         return;
8506     }
8507 
8508     ftrace_func_mapper_remove_ip(mapper, ip);
8509 }
8510 
8511 static struct ftrace_probe_ops snapshot_probe_ops = {
8512     .func           = ftrace_snapshot,
8513     .print          = ftrace_snapshot_print,
8514 };
8515 
8516 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8517     .func           = ftrace_count_snapshot,
8518     .print          = ftrace_snapshot_print,
8519     .init           = ftrace_snapshot_init,
8520     .free           = ftrace_snapshot_free,
8521 };
8522 
8523 static int
8524 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8525                    char *glob, char *cmd, char *param, int enable)
8526 {
8527     struct ftrace_probe_ops *ops;
8528     void *count = (void *)-1;
8529     char *number;
8530     int ret;
8531 
8532     if (!tr)
8533         return -ENODEV;
8534 
8535     /* hash funcs only work with set_ftrace_filter */
8536     if (!enable)
8537         return -EINVAL;
8538 
8539     ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8540 
8541     if (glob[0] == '!')
8542         return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8543 
8544     if (!param)
8545         goto out_reg;
8546 
8547     number = strsep(&param, ":");
8548 
8549     if (!strlen(number))
8550         goto out_reg;
8551 
8552     /*
8553      * We use the callback data field (which is a pointer)
8554      * as our counter.
8555      */
8556     ret = kstrtoul(number, 0, (unsigned long *)&count);
8557     if (ret)
8558         return ret;
8559 
8560  out_reg:
8561     ret = tracing_alloc_snapshot_instance(tr);
8562     if (ret < 0)
8563         goto out;
8564 
8565     ret = register_ftrace_function_probe(glob, tr, ops, count);
8566 
8567  out:
8568     return ret < 0 ? ret : 0;
8569 }
8570 
8571 static struct ftrace_func_command ftrace_snapshot_cmd = {
8572     .name           = "snapshot",
8573     .func           = ftrace_trace_snapshot_callback,
8574 };
8575 
8576 static __init int register_snapshot_cmd(void)
8577 {
8578     return register_ftrace_command(&ftrace_snapshot_cmd);
8579 }
8580 #else
8581 static inline __init int register_snapshot_cmd(void) { return 0; }
8582 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8583 
8584 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8585 {
8586     if (WARN_ON(!tr->dir))
8587         return ERR_PTR(-ENODEV);
8588 
8589     /* Top directory uses NULL as the parent */
8590     if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8591         return NULL;
8592 
8593     /* All sub buffers have a descriptor */
8594     return tr->dir;
8595 }
8596 
8597 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8598 {
8599     struct dentry *d_tracer;
8600 
8601     if (tr->percpu_dir)
8602         return tr->percpu_dir;
8603 
8604     d_tracer = tracing_get_dentry(tr);
8605     if (IS_ERR(d_tracer))
8606         return NULL;
8607 
8608     tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8609 
8610     MEM_FAIL(!tr->percpu_dir,
8611           "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8612 
8613     return tr->percpu_dir;
8614 }
8615 
8616 static struct dentry *
8617 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8618               void *data, long cpu, const struct file_operations *fops)
8619 {
8620     struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8621 
8622     if (ret) /* See tracing_get_cpu() */
8623         d_inode(ret)->i_cdev = (void *)(cpu + 1);
8624     return ret;
8625 }
8626 
8627 static void
8628 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8629 {
8630     struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8631     struct dentry *d_cpu;
8632     char cpu_dir[30]; /* 30 characters should be more than enough */
8633 
8634     if (!d_percpu)
8635         return;
8636 
8637     snprintf(cpu_dir, 30, "cpu%ld", cpu);
8638     d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8639     if (!d_cpu) {
8640         pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8641         return;
8642     }
8643 
8644     /* per cpu trace_pipe */
8645     trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8646                 tr, cpu, &tracing_pipe_fops);
8647 
8648     /* per cpu trace */
8649     trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8650                 tr, cpu, &tracing_fops);
8651 
8652     trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8653                 tr, cpu, &tracing_buffers_fops);
8654 
8655     trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8656                 tr, cpu, &tracing_stats_fops);
8657 
8658     trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8659                 tr, cpu, &tracing_entries_fops);
8660 
8661 #ifdef CONFIG_TRACER_SNAPSHOT
8662     trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8663                 tr, cpu, &snapshot_fops);
8664 
8665     trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8666                 tr, cpu, &snapshot_raw_fops);
8667 #endif
8668 }
8669 
8670 #ifdef CONFIG_FTRACE_SELFTEST
8671 /* Let selftest have access to static functions in this file */
8672 #include "trace_selftest.c"
8673 #endif
8674 
8675 static ssize_t
8676 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8677             loff_t *ppos)
8678 {
8679     struct trace_option_dentry *topt = filp->private_data;
8680     char *buf;
8681 
8682     if (topt->flags->val & topt->opt->bit)
8683         buf = "1\n";
8684     else
8685         buf = "0\n";
8686 
8687     return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8688 }
8689 
8690 static ssize_t
8691 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8692              loff_t *ppos)
8693 {
8694     struct trace_option_dentry *topt = filp->private_data;
8695     unsigned long val;
8696     int ret;
8697 
8698     ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8699     if (ret)
8700         return ret;
8701 
8702     if (val != 0 && val != 1)
8703         return -EINVAL;
8704 
8705     if (!!(topt->flags->val & topt->opt->bit) != val) {
8706         mutex_lock(&trace_types_lock);
8707         ret = __set_tracer_option(topt->tr, topt->flags,
8708                       topt->opt, !val);
8709         mutex_unlock(&trace_types_lock);
8710         if (ret)
8711             return ret;
8712     }
8713 
8714     *ppos += cnt;
8715 
8716     return cnt;
8717 }
8718 
8719 
8720 static const struct file_operations trace_options_fops = {
8721     .open = tracing_open_generic,
8722     .read = trace_options_read,
8723     .write = trace_options_write,
8724     .llseek = generic_file_llseek,
8725 };
8726 
8727 /*
8728  * In order to pass in both the trace_array descriptor as well as the index
8729  * to the flag that the trace option file represents, the trace_array
8730  * has a character array of trace_flags_index[], which holds the index
8731  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8732  * The address of this character array is passed to the flag option file
8733  * read/write callbacks.
8734  *
8735  * In order to extract both the index and the trace_array descriptor,
8736  * get_tr_index() uses the following algorithm.
8737  *
8738  *   idx = *ptr;
8739  *
8740  * As the pointer itself contains the address of the index (remember
8741  * index[1] == 1).
8742  *
8743  * Then to get the trace_array descriptor, by subtracting that index
8744  * from the ptr, we get to the start of the index itself.
8745  *
8746  *   ptr - idx == &index[0]
8747  *
8748  * Then a simple container_of() from that pointer gets us to the
8749  * trace_array descriptor.
8750  */
8751 static void get_tr_index(void *data, struct trace_array **ptr,
8752              unsigned int *pindex)
8753 {
8754     *pindex = *(unsigned char *)data;
8755 
8756     *ptr = container_of(data - *pindex, struct trace_array,
8757                 trace_flags_index);
8758 }
8759 
8760 static ssize_t
8761 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8762             loff_t *ppos)
8763 {
8764     void *tr_index = filp->private_data;
8765     struct trace_array *tr;
8766     unsigned int index;
8767     char *buf;
8768 
8769     get_tr_index(tr_index, &tr, &index);
8770 
8771     if (tr->trace_flags & (1 << index))
8772         buf = "1\n";
8773     else
8774         buf = "0\n";
8775 
8776     return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8777 }
8778 
8779 static ssize_t
8780 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8781              loff_t *ppos)
8782 {
8783     void *tr_index = filp->private_data;
8784     struct trace_array *tr;
8785     unsigned int index;
8786     unsigned long val;
8787     int ret;
8788 
8789     get_tr_index(tr_index, &tr, &index);
8790 
8791     ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8792     if (ret)
8793         return ret;
8794 
8795     if (val != 0 && val != 1)
8796         return -EINVAL;
8797 
8798     mutex_lock(&event_mutex);
8799     mutex_lock(&trace_types_lock);
8800     ret = set_tracer_flag(tr, 1 << index, val);
8801     mutex_unlock(&trace_types_lock);
8802     mutex_unlock(&event_mutex);
8803 
8804     if (ret < 0)
8805         return ret;
8806 
8807     *ppos += cnt;
8808 
8809     return cnt;
8810 }
8811 
8812 static const struct file_operations trace_options_core_fops = {
8813     .open = tracing_open_generic,
8814     .read = trace_options_core_read,
8815     .write = trace_options_core_write,
8816     .llseek = generic_file_llseek,
8817 };
8818 
8819 struct dentry *trace_create_file(const char *name,
8820                  umode_t mode,
8821                  struct dentry *parent,
8822                  void *data,
8823                  const struct file_operations *fops)
8824 {
8825     struct dentry *ret;
8826 
8827     ret = tracefs_create_file(name, mode, parent, data, fops);
8828     if (!ret)
8829         pr_warn("Could not create tracefs '%s' entry\n", name);
8830 
8831     return ret;
8832 }
8833 
8834 
8835 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8836 {
8837     struct dentry *d_tracer;
8838 
8839     if (tr->options)
8840         return tr->options;
8841 
8842     d_tracer = tracing_get_dentry(tr);
8843     if (IS_ERR(d_tracer))
8844         return NULL;
8845 
8846     tr->options = tracefs_create_dir("options", d_tracer);
8847     if (!tr->options) {
8848         pr_warn("Could not create tracefs directory 'options'\n");
8849         return NULL;
8850     }
8851 
8852     return tr->options;
8853 }
8854 
8855 static void
8856 create_trace_option_file(struct trace_array *tr,
8857              struct trace_option_dentry *topt,
8858              struct tracer_flags *flags,
8859              struct tracer_opt *opt)
8860 {
8861     struct dentry *t_options;
8862 
8863     t_options = trace_options_init_dentry(tr);
8864     if (!t_options)
8865         return;
8866 
8867     topt->flags = flags;
8868     topt->opt = opt;
8869     topt->tr = tr;
8870 
8871     topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8872                     t_options, topt, &trace_options_fops);
8873 
8874 }
8875 
8876 static void
8877 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8878 {
8879     struct trace_option_dentry *topts;
8880     struct trace_options *tr_topts;
8881     struct tracer_flags *flags;
8882     struct tracer_opt *opts;
8883     int cnt;
8884     int i;
8885 
8886     if (!tracer)
8887         return;
8888 
8889     flags = tracer->flags;
8890 
8891     if (!flags || !flags->opts)
8892         return;
8893 
8894     /*
8895      * If this is an instance, only create flags for tracers
8896      * the instance may have.
8897      */
8898     if (!trace_ok_for_array(tracer, tr))
8899         return;
8900 
8901     for (i = 0; i < tr->nr_topts; i++) {
8902         /* Make sure there's no duplicate flags. */
8903         if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8904             return;
8905     }
8906 
8907     opts = flags->opts;
8908 
8909     for (cnt = 0; opts[cnt].name; cnt++)
8910         ;
8911 
8912     topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8913     if (!topts)
8914         return;
8915 
8916     tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8917                 GFP_KERNEL);
8918     if (!tr_topts) {
8919         kfree(topts);
8920         return;
8921     }
8922 
8923     tr->topts = tr_topts;
8924     tr->topts[tr->nr_topts].tracer = tracer;
8925     tr->topts[tr->nr_topts].topts = topts;
8926     tr->nr_topts++;
8927 
8928     for (cnt = 0; opts[cnt].name; cnt++) {
8929         create_trace_option_file(tr, &topts[cnt], flags,
8930                      &opts[cnt]);
8931         MEM_FAIL(topts[cnt].entry == NULL,
8932               "Failed to create trace option: %s",
8933               opts[cnt].name);
8934     }
8935 }
8936 
8937 static struct dentry *
8938 create_trace_option_core_file(struct trace_array *tr,
8939                   const char *option, long index)
8940 {
8941     struct dentry *t_options;
8942 
8943     t_options = trace_options_init_dentry(tr);
8944     if (!t_options)
8945         return NULL;
8946 
8947     return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8948                  (void *)&tr->trace_flags_index[index],
8949                  &trace_options_core_fops);
8950 }
8951 
8952 static void create_trace_options_dir(struct trace_array *tr)
8953 {
8954     struct dentry *t_options;
8955     bool top_level = tr == &global_trace;
8956     int i;
8957 
8958     t_options = trace_options_init_dentry(tr);
8959     if (!t_options)
8960         return;
8961 
8962     for (i = 0; trace_options[i]; i++) {
8963         if (top_level ||
8964             !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8965             create_trace_option_core_file(tr, trace_options[i], i);
8966     }
8967 }
8968 
8969 static ssize_t
8970 rb_simple_read(struct file *filp, char __user *ubuf,
8971            size_t cnt, loff_t *ppos)
8972 {
8973     struct trace_array *tr = filp->private_data;
8974     char buf[64];
8975     int r;
8976 
8977     r = tracer_tracing_is_on(tr);
8978     r = sprintf(buf, "%d\n", r);
8979 
8980     return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8981 }
8982 
8983 static ssize_t
8984 rb_simple_write(struct file *filp, const char __user *ubuf,
8985         size_t cnt, loff_t *ppos)
8986 {
8987     struct trace_array *tr = filp->private_data;
8988     struct trace_buffer *buffer = tr->array_buffer.buffer;
8989     unsigned long val;
8990     int ret;
8991 
8992     ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8993     if (ret)
8994         return ret;
8995 
8996     if (buffer) {
8997         mutex_lock(&trace_types_lock);
8998         if (!!val == tracer_tracing_is_on(tr)) {
8999             val = 0; /* do nothing */
9000         } else if (val) {
9001             tracer_tracing_on(tr);
9002             if (tr->current_trace->start)
9003                 tr->current_trace->start(tr);
9004         } else {
9005             tracer_tracing_off(tr);
9006             if (tr->current_trace->stop)
9007                 tr->current_trace->stop(tr);
9008         }
9009         mutex_unlock(&trace_types_lock);
9010     }
9011 
9012     (*ppos)++;
9013 
9014     return cnt;
9015 }
9016 
9017 static const struct file_operations rb_simple_fops = {
9018     .open       = tracing_open_generic_tr,
9019     .read       = rb_simple_read,
9020     .write      = rb_simple_write,
9021     .release    = tracing_release_generic_tr,
9022     .llseek     = default_llseek,
9023 };
9024 
9025 static ssize_t
9026 buffer_percent_read(struct file *filp, char __user *ubuf,
9027             size_t cnt, loff_t *ppos)
9028 {
9029     struct trace_array *tr = filp->private_data;
9030     char buf[64];
9031     int r;
9032 
9033     r = tr->buffer_percent;
9034     r = sprintf(buf, "%d\n", r);
9035 
9036     return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9037 }
9038 
9039 static ssize_t
9040 buffer_percent_write(struct file *filp, const char __user *ubuf,
9041              size_t cnt, loff_t *ppos)
9042 {
9043     struct trace_array *tr = filp->private_data;
9044     unsigned long val;
9045     int ret;
9046 
9047     ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9048     if (ret)
9049         return ret;
9050 
9051     if (val > 100)
9052         return -EINVAL;
9053 
9054     if (!val)
9055         val = 1;
9056 
9057     tr->buffer_percent = val;
9058 
9059     (*ppos)++;
9060 
9061     return cnt;
9062 }
9063 
9064 static const struct file_operations buffer_percent_fops = {
9065     .open       = tracing_open_generic_tr,
9066     .read       = buffer_percent_read,
9067     .write      = buffer_percent_write,
9068     .release    = tracing_release_generic_tr,
9069     .llseek     = default_llseek,
9070 };
9071 
9072 static struct dentry *trace_instance_dir;
9073 
9074 static void
9075 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9076 
9077 static int
9078 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9079 {
9080     enum ring_buffer_flags rb_flags;
9081 
9082     rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9083 
9084     buf->tr = tr;
9085 
9086     buf->buffer = ring_buffer_alloc(size, rb_flags);
9087     if (!buf->buffer)
9088         return -ENOMEM;
9089 
9090     buf->data = alloc_percpu(struct trace_array_cpu);
9091     if (!buf->data) {
9092         ring_buffer_free(buf->buffer);
9093         buf->buffer = NULL;
9094         return -ENOMEM;
9095     }
9096 
9097     /* Allocate the first page for all buffers */
9098     set_buffer_entries(&tr->array_buffer,
9099                ring_buffer_size(tr->array_buffer.buffer, 0));
9100 
9101     return 0;
9102 }
9103 
9104 static void free_trace_buffer(struct array_buffer *buf)
9105 {
9106     if (buf->buffer) {
9107         ring_buffer_free(buf->buffer);
9108         buf->buffer = NULL;
9109         free_percpu(buf->data);
9110         buf->data = NULL;
9111     }
9112 }
9113 
9114 static int allocate_trace_buffers(struct trace_array *tr, int size)
9115 {
9116     int ret;
9117 
9118     ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9119     if (ret)
9120         return ret;
9121 
9122 #ifdef CONFIG_TRACER_MAX_TRACE
9123     ret = allocate_trace_buffer(tr, &tr->max_buffer,
9124                     allocate_snapshot ? size : 1);
9125     if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9126         free_trace_buffer(&tr->array_buffer);
9127         return -ENOMEM;
9128     }
9129     tr->allocated_snapshot = allocate_snapshot;
9130 
9131     /*
9132      * Only the top level trace array gets its snapshot allocated
9133      * from the kernel command line.
9134      */
9135     allocate_snapshot = false;
9136 #endif
9137 
9138     return 0;
9139 }
9140 
9141 static void free_trace_buffers(struct trace_array *tr)
9142 {
9143     if (!tr)
9144         return;
9145 
9146     free_trace_buffer(&tr->array_buffer);
9147 
9148 #ifdef CONFIG_TRACER_MAX_TRACE
9149     free_trace_buffer(&tr->max_buffer);
9150 #endif
9151 }
9152 
9153 static void init_trace_flags_index(struct trace_array *tr)
9154 {
9155     int i;
9156 
9157     /* Used by the trace options files */
9158     for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9159         tr->trace_flags_index[i] = i;
9160 }
9161 
9162 static void __update_tracer_options(struct trace_array *tr)
9163 {
9164     struct tracer *t;
9165 
9166     for (t = trace_types; t; t = t->next)
9167         add_tracer_options(tr, t);
9168 }
9169 
9170 static void update_tracer_options(struct trace_array *tr)
9171 {
9172     mutex_lock(&trace_types_lock);
9173     tracer_options_updated = true;
9174     __update_tracer_options(tr);
9175     mutex_unlock(&trace_types_lock);
9176 }
9177 
9178 /* Must have trace_types_lock held */
9179 struct trace_array *trace_array_find(const char *instance)
9180 {
9181     struct trace_array *tr, *found = NULL;
9182 
9183     list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9184         if (tr->name && strcmp(tr->name, instance) == 0) {
9185             found = tr;
9186             break;
9187         }
9188     }
9189 
9190     return found;
9191 }
9192 
9193 struct trace_array *trace_array_find_get(const char *instance)
9194 {
9195     struct trace_array *tr;
9196 
9197     mutex_lock(&trace_types_lock);
9198     tr = trace_array_find(instance);
9199     if (tr)
9200         tr->ref++;
9201     mutex_unlock(&trace_types_lock);
9202 
9203     return tr;
9204 }
9205 
9206 static int trace_array_create_dir(struct trace_array *tr)
9207 {
9208     int ret;
9209 
9210     tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9211     if (!tr->dir)
9212         return -EINVAL;
9213 
9214     ret = event_trace_add_tracer(tr->dir, tr);
9215     if (ret) {
9216         tracefs_remove(tr->dir);
9217         return ret;
9218     }
9219 
9220     init_tracer_tracefs(tr, tr->dir);
9221     __update_tracer_options(tr);
9222 
9223     return ret;
9224 }
9225 
9226 static struct trace_array *trace_array_create(const char *name)
9227 {
9228     struct trace_array *tr;
9229     int ret;
9230 
9231     ret = -ENOMEM;
9232     tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9233     if (!tr)
9234         return ERR_PTR(ret);
9235 
9236     tr->name = kstrdup(name, GFP_KERNEL);
9237     if (!tr->name)
9238         goto out_free_tr;
9239 
9240     if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9241         goto out_free_tr;
9242 
9243     tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9244 
9245     cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9246 
9247     raw_spin_lock_init(&tr->start_lock);
9248 
9249     tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9250 
9251     tr->current_trace = &nop_trace;
9252 
9253     INIT_LIST_HEAD(&tr->systems);
9254     INIT_LIST_HEAD(&tr->events);
9255     INIT_LIST_HEAD(&tr->hist_vars);
9256     INIT_LIST_HEAD(&tr->err_log);
9257 
9258     if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9259         goto out_free_tr;
9260 
9261     if (ftrace_allocate_ftrace_ops(tr) < 0)
9262         goto out_free_tr;
9263 
9264     ftrace_init_trace_array(tr);
9265 
9266     init_trace_flags_index(tr);
9267 
9268     if (trace_instance_dir) {
9269         ret = trace_array_create_dir(tr);
9270         if (ret)
9271             goto out_free_tr;
9272     } else
9273         __trace_early_add_events(tr);
9274 
9275     list_add(&tr->list, &ftrace_trace_arrays);
9276 
9277     tr->ref++;
9278 
9279     return tr;
9280 
9281  out_free_tr:
9282     ftrace_free_ftrace_ops(tr);
9283     free_trace_buffers(tr);
9284     free_cpumask_var(tr->tracing_cpumask);
9285     kfree(tr->name);
9286     kfree(tr);
9287 
9288     return ERR_PTR(ret);
9289 }
9290 
9291 static int instance_mkdir(const char *name)
9292 {
9293     struct trace_array *tr;
9294     int ret;
9295 
9296     mutex_lock(&event_mutex);
9297     mutex_lock(&trace_types_lock);
9298 
9299     ret = -EEXIST;
9300     if (trace_array_find(name))
9301         goto out_unlock;
9302 
9303     tr = trace_array_create(name);
9304 
9305     ret = PTR_ERR_OR_ZERO(tr);
9306 
9307 out_unlock:
9308     mutex_unlock(&trace_types_lock);
9309     mutex_unlock(&event_mutex);
9310     return ret;
9311 }
9312 
9313 /**
9314  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9315  * @name: The name of the trace array to be looked up/created.
9316  *
9317  * Returns pointer to trace array with given name.
9318  * NULL, if it cannot be created.
9319  *
9320  * NOTE: This function increments the reference counter associated with the
9321  * trace array returned. This makes sure it cannot be freed while in use.
9322  * Use trace_array_put() once the trace array is no longer needed.
9323  * If the trace_array is to be freed, trace_array_destroy() needs to
9324  * be called after the trace_array_put(), or simply let user space delete
9325  * it from the tracefs instances directory. But until the
9326  * trace_array_put() is called, user space can not delete it.
9327  *
9328  */
9329 struct trace_array *trace_array_get_by_name(const char *name)
9330 {
9331     struct trace_array *tr;
9332 
9333     mutex_lock(&event_mutex);
9334     mutex_lock(&trace_types_lock);
9335 
9336     list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9337         if (tr->name && strcmp(tr->name, name) == 0)
9338             goto out_unlock;
9339     }
9340 
9341     tr = trace_array_create(name);
9342 
9343     if (IS_ERR(tr))
9344         tr = NULL;
9345 out_unlock:
9346     if (tr)
9347         tr->ref++;
9348 
9349     mutex_unlock(&trace_types_lock);
9350     mutex_unlock(&event_mutex);
9351     return tr;
9352 }
9353 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9354 
9355 static int __remove_instance(struct trace_array *tr)
9356 {
9357     int i;
9358 
9359     /* Reference counter for a newly created trace array = 1. */
9360     if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9361         return -EBUSY;
9362 
9363     list_del(&tr->list);
9364 
9365     /* Disable all the flags that were enabled coming in */
9366     for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9367         if ((1 << i) & ZEROED_TRACE_FLAGS)
9368             set_tracer_flag(tr, 1 << i, 0);
9369     }
9370 
9371     tracing_set_nop(tr);
9372     clear_ftrace_function_probes(tr);
9373     event_trace_del_tracer(tr);
9374     ftrace_clear_pids(tr);
9375     ftrace_destroy_function_files(tr);
9376     tracefs_remove(tr->dir);
9377     free_percpu(tr->last_func_repeats);
9378     free_trace_buffers(tr);
9379 
9380     for (i = 0; i < tr->nr_topts; i++) {
9381         kfree(tr->topts[i].topts);
9382     }
9383     kfree(tr->topts);
9384 
9385     free_cpumask_var(tr->tracing_cpumask);
9386     kfree(tr->name);
9387     kfree(tr);
9388 
9389     return 0;
9390 }
9391 
9392 int trace_array_destroy(struct trace_array *this_tr)
9393 {
9394     struct trace_array *tr;
9395     int ret;
9396 
9397     if (!this_tr)
9398         return -EINVAL;
9399 
9400     mutex_lock(&event_mutex);
9401     mutex_lock(&trace_types_lock);
9402 
9403     ret = -ENODEV;
9404 
9405     /* Making sure trace array exists before destroying it. */
9406     list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9407         if (tr == this_tr) {
9408             ret = __remove_instance(tr);
9409             break;
9410         }
9411     }
9412 
9413     mutex_unlock(&trace_types_lock);
9414     mutex_unlock(&event_mutex);
9415 
9416     return ret;
9417 }
9418 EXPORT_SYMBOL_GPL(trace_array_destroy);
9419 
9420 static int instance_rmdir(const char *name)
9421 {
9422     struct trace_array *tr;
9423     int ret;
9424 
9425     mutex_lock(&event_mutex);
9426     mutex_lock(&trace_types_lock);
9427 
9428     ret = -ENODEV;
9429     tr = trace_array_find(name);
9430     if (tr)
9431         ret = __remove_instance(tr);
9432 
9433     mutex_unlock(&trace_types_lock);
9434     mutex_unlock(&event_mutex);
9435 
9436     return ret;
9437 }
9438 
9439 static __init void create_trace_instances(struct dentry *d_tracer)
9440 {
9441     struct trace_array *tr;
9442 
9443     trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9444                              instance_mkdir,
9445                              instance_rmdir);
9446     if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9447         return;
9448 
9449     mutex_lock(&event_mutex);
9450     mutex_lock(&trace_types_lock);
9451 
9452     list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9453         if (!tr->name)
9454             continue;
9455         if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9456                  "Failed to create instance directory\n"))
9457             break;
9458     }
9459 
9460     mutex_unlock(&trace_types_lock);
9461     mutex_unlock(&event_mutex);
9462 }
9463 
9464 static void
9465 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9466 {
9467     struct trace_event_file *file;
9468     int cpu;
9469 
9470     trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9471             tr, &show_traces_fops);
9472 
9473     trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9474             tr, &set_tracer_fops);
9475 
9476     trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9477               tr, &tracing_cpumask_fops);
9478 
9479     trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9480               tr, &tracing_iter_fops);
9481 
9482     trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9483               tr, &tracing_fops);
9484 
9485     trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9486               tr, &tracing_pipe_fops);
9487 
9488     trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9489               tr, &tracing_entries_fops);
9490 
9491     trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9492               tr, &tracing_total_entries_fops);
9493 
9494     trace_create_file("free_buffer", 0200, d_tracer,
9495               tr, &tracing_free_buffer_fops);
9496 
9497     trace_create_file("trace_marker", 0220, d_tracer,
9498               tr, &tracing_mark_fops);
9499 
9500     file = __find_event_file(tr, "ftrace", "print");
9501     if (file && file->dir)
9502         trace_create_file("trigger", TRACE_MODE_WRITE, file->dir,
9503                   file, &event_trigger_fops);
9504     tr->trace_marker_file = file;
9505 
9506     trace_create_file("trace_marker_raw", 0220, d_tracer,
9507               tr, &tracing_mark_raw_fops);
9508 
9509     trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9510               &trace_clock_fops);
9511 
9512     trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9513               tr, &rb_simple_fops);
9514 
9515     trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9516               &trace_time_stamp_mode_fops);
9517 
9518     tr->buffer_percent = 50;
9519 
9520     trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer,
9521             tr, &buffer_percent_fops);
9522 
9523     create_trace_options_dir(tr);
9524 
9525     trace_create_maxlat_file(tr, d_tracer);
9526 
9527     if (ftrace_create_function_files(tr, d_tracer))
9528         MEM_FAIL(1, "Could not allocate function filter files");
9529 
9530 #ifdef CONFIG_TRACER_SNAPSHOT
9531     trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9532               tr, &snapshot_fops);
9533 #endif
9534 
9535     trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9536               tr, &tracing_err_log_fops);
9537 
9538     for_each_tracing_cpu(cpu)
9539         tracing_init_tracefs_percpu(tr, cpu);
9540 
9541     ftrace_init_tracefs(tr, d_tracer);
9542 }
9543 
9544 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9545 {
9546     struct vfsmount *mnt;
9547     struct file_system_type *type;
9548 
9549     /*
9550      * To maintain backward compatibility for tools that mount
9551      * debugfs to get to the tracing facility, tracefs is automatically
9552      * mounted to the debugfs/tracing directory.
9553      */
9554     type = get_fs_type("tracefs");
9555     if (!type)
9556         return NULL;
9557     mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9558     put_filesystem(type);
9559     if (IS_ERR(mnt))
9560         return NULL;
9561     mntget(mnt);
9562 
9563     return mnt;
9564 }
9565 
9566 /**
9567  * tracing_init_dentry - initialize top level trace array
9568  *
9569  * This is called when creating files or directories in the tracing
9570  * directory. It is called via fs_initcall() by any of the boot up code
9571  * and expects to return the dentry of the top level tracing directory.
9572  */
9573 int tracing_init_dentry(void)
9574 {
9575     struct trace_array *tr = &global_trace;
9576 
9577     if (security_locked_down(LOCKDOWN_TRACEFS)) {
9578         pr_warn("Tracing disabled due to lockdown\n");
9579         return -EPERM;
9580     }
9581 
9582     /* The top level trace array uses  NULL as parent */
9583     if (tr->dir)
9584         return 0;
9585 
9586     if (WARN_ON(!tracefs_initialized()))
9587         return -ENODEV;
9588 
9589     /*
9590      * As there may still be users that expect the tracing
9591      * files to exist in debugfs/tracing, we must automount
9592      * the tracefs file system there, so older tools still
9593      * work with the newer kernel.
9594      */
9595     tr->dir = debugfs_create_automount("tracing", NULL,
9596                        trace_automount, NULL);
9597 
9598     return 0;
9599 }
9600 
9601 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9602 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9603 
9604 static struct workqueue_struct *eval_map_wq __initdata;
9605 static struct work_struct eval_map_work __initdata;
9606 static struct work_struct tracerfs_init_work __initdata;
9607 
9608 static void __init eval_map_work_func(struct work_struct *work)
9609 {
9610     int len;
9611 
9612     len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9613     trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9614 }
9615 
9616 static int __init trace_eval_init(void)
9617 {
9618     INIT_WORK(&eval_map_work, eval_map_work_func);
9619 
9620     eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9621     if (!eval_map_wq) {
9622         pr_err("Unable to allocate eval_map_wq\n");
9623         /* Do work here */
9624         eval_map_work_func(&eval_map_work);
9625         return -ENOMEM;
9626     }
9627 
9628     queue_work(eval_map_wq, &eval_map_work);
9629     return 0;
9630 }
9631 
9632 subsys_initcall(trace_eval_init);
9633 
9634 static int __init trace_eval_sync(void)
9635 {
9636     /* Make sure the eval map updates are finished */
9637     if (eval_map_wq)
9638         destroy_workqueue(eval_map_wq);
9639     return 0;
9640 }
9641 
9642 late_initcall_sync(trace_eval_sync);
9643 
9644 
9645 #ifdef CONFIG_MODULES
9646 static void trace_module_add_evals(struct module *mod)
9647 {
9648     if (!mod->num_trace_evals)
9649         return;
9650 
9651     /*
9652      * Modules with bad taint do not have events created, do
9653      * not bother with enums either.
9654      */
9655     if (trace_module_has_bad_taint(mod))
9656         return;
9657 
9658     trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9659 }
9660 
9661 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9662 static void trace_module_remove_evals(struct module *mod)
9663 {
9664     union trace_eval_map_item *map;
9665     union trace_eval_map_item **last = &trace_eval_maps;
9666 
9667     if (!mod->num_trace_evals)
9668         return;
9669 
9670     mutex_lock(&trace_eval_mutex);
9671 
9672     map = trace_eval_maps;
9673 
9674     while (map) {
9675         if (map->head.mod == mod)
9676             break;
9677         map = trace_eval_jmp_to_tail(map);
9678         last = &map->tail.next;
9679         map = map->tail.next;
9680     }
9681     if (!map)
9682         goto out;
9683 
9684     *last = trace_eval_jmp_to_tail(map)->tail.next;
9685     kfree(map);
9686  out:
9687     mutex_unlock(&trace_eval_mutex);
9688 }
9689 #else
9690 static inline void trace_module_remove_evals(struct module *mod) { }
9691 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9692 
9693 static int trace_module_notify(struct notifier_block *self,
9694                    unsigned long val, void *data)
9695 {
9696     struct module *mod = data;
9697 
9698     switch (val) {
9699     case MODULE_STATE_COMING:
9700         trace_module_add_evals(mod);
9701         break;
9702     case MODULE_STATE_GOING:
9703         trace_module_remove_evals(mod);
9704         break;
9705     }
9706 
9707     return NOTIFY_OK;
9708 }
9709 
9710 static struct notifier_block trace_module_nb = {
9711     .notifier_call = trace_module_notify,
9712     .priority = 0,
9713 };
9714 #endif /* CONFIG_MODULES */
9715 
9716 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9717 {
9718 
9719     event_trace_init();
9720 
9721     init_tracer_tracefs(&global_trace, NULL);
9722     ftrace_init_tracefs_toplevel(&global_trace, NULL);
9723 
9724     trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9725             &global_trace, &tracing_thresh_fops);
9726 
9727     trace_create_file("README", TRACE_MODE_READ, NULL,
9728             NULL, &tracing_readme_fops);
9729 
9730     trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9731             NULL, &tracing_saved_cmdlines_fops);
9732 
9733     trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9734               NULL, &tracing_saved_cmdlines_size_fops);
9735 
9736     trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9737             NULL, &tracing_saved_tgids_fops);
9738 
9739     trace_create_eval_file(NULL);
9740 
9741 #ifdef CONFIG_MODULES
9742     register_module_notifier(&trace_module_nb);
9743 #endif
9744 
9745 #ifdef CONFIG_DYNAMIC_FTRACE
9746     trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9747             NULL, &tracing_dyn_info_fops);
9748 #endif
9749 
9750     create_trace_instances(NULL);
9751 
9752     update_tracer_options(&global_trace);
9753 }
9754 
9755 static __init int tracer_init_tracefs(void)
9756 {
9757     int ret;
9758 
9759     trace_access_lock_init();
9760 
9761     ret = tracing_init_dentry();
9762     if (ret)
9763         return 0;
9764 
9765     if (eval_map_wq) {
9766         INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9767         queue_work(eval_map_wq, &tracerfs_init_work);
9768     } else {
9769         tracer_init_tracefs_work_func(NULL);
9770     }
9771 
9772     rv_init_interface();
9773 
9774     return 0;
9775 }
9776 
9777 fs_initcall(tracer_init_tracefs);
9778 
9779 static int trace_panic_handler(struct notifier_block *this,
9780                    unsigned long event, void *unused)
9781 {
9782     if (ftrace_dump_on_oops)
9783         ftrace_dump(ftrace_dump_on_oops);
9784     return NOTIFY_OK;
9785 }
9786 
9787 static struct notifier_block trace_panic_notifier = {
9788     .notifier_call  = trace_panic_handler,
9789     .next           = NULL,
9790     .priority       = 150   /* priority: INT_MAX >= x >= 0 */
9791 };
9792 
9793 static int trace_die_handler(struct notifier_block *self,
9794                  unsigned long val,
9795                  void *data)
9796 {
9797     switch (val) {
9798     case DIE_OOPS:
9799         if (ftrace_dump_on_oops)
9800             ftrace_dump(ftrace_dump_on_oops);
9801         break;
9802     default:
9803         break;
9804     }
9805     return NOTIFY_OK;
9806 }
9807 
9808 static struct notifier_block trace_die_notifier = {
9809     .notifier_call = trace_die_handler,
9810     .priority = 200
9811 };
9812 
9813 /*
9814  * printk is set to max of 1024, we really don't need it that big.
9815  * Nothing should be printing 1000 characters anyway.
9816  */
9817 #define TRACE_MAX_PRINT     1000
9818 
9819 /*
9820  * Define here KERN_TRACE so that we have one place to modify
9821  * it if we decide to change what log level the ftrace dump
9822  * should be at.
9823  */
9824 #define KERN_TRACE      KERN_EMERG
9825 
9826 void
9827 trace_printk_seq(struct trace_seq *s)
9828 {
9829     /* Probably should print a warning here. */
9830     if (s->seq.len >= TRACE_MAX_PRINT)
9831         s->seq.len = TRACE_MAX_PRINT;
9832 
9833     /*
9834      * More paranoid code. Although the buffer size is set to
9835      * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9836      * an extra layer of protection.
9837      */
9838     if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9839         s->seq.len = s->seq.size - 1;
9840 
9841     /* should be zero ended, but we are paranoid. */
9842     s->buffer[s->seq.len] = 0;
9843 
9844     printk(KERN_TRACE "%s", s->buffer);
9845 
9846     trace_seq_init(s);
9847 }
9848 
9849 void trace_init_global_iter(struct trace_iterator *iter)
9850 {
9851     iter->tr = &global_trace;
9852     iter->trace = iter->tr->current_trace;
9853     iter->cpu_file = RING_BUFFER_ALL_CPUS;
9854     iter->array_buffer = &global_trace.array_buffer;
9855 
9856     if (iter->trace && iter->trace->open)
9857         iter->trace->open(iter);
9858 
9859     /* Annotate start of buffers if we had overruns */
9860     if (ring_buffer_overruns(iter->array_buffer->buffer))
9861         iter->iter_flags |= TRACE_FILE_ANNOTATE;
9862 
9863     /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9864     if (trace_clocks[iter->tr->clock_id].in_ns)
9865         iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9866 
9867     /* Can not use kmalloc for iter.temp and iter.fmt */
9868     iter->temp = static_temp_buf;
9869     iter->temp_size = STATIC_TEMP_BUF_SIZE;
9870     iter->fmt = static_fmt_buf;
9871     iter->fmt_size = STATIC_FMT_BUF_SIZE;
9872 }
9873 
9874 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9875 {
9876     /* use static because iter can be a bit big for the stack */
9877     static struct trace_iterator iter;
9878     static atomic_t dump_running;
9879     struct trace_array *tr = &global_trace;
9880     unsigned int old_userobj;
9881     unsigned long flags;
9882     int cnt = 0, cpu;
9883 
9884     /* Only allow one dump user at a time. */
9885     if (atomic_inc_return(&dump_running) != 1) {
9886         atomic_dec(&dump_running);
9887         return;
9888     }
9889 
9890     /*
9891      * Always turn off tracing when we dump.
9892      * We don't need to show trace output of what happens
9893      * between multiple crashes.
9894      *
9895      * If the user does a sysrq-z, then they can re-enable
9896      * tracing with echo 1 > tracing_on.
9897      */
9898     tracing_off();
9899 
9900     local_irq_save(flags);
9901 
9902     /* Simulate the iterator */
9903     trace_init_global_iter(&iter);
9904 
9905     for_each_tracing_cpu(cpu) {
9906         atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9907     }
9908 
9909     old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9910 
9911     /* don't look at user memory in panic mode */
9912     tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9913 
9914     switch (oops_dump_mode) {
9915     case DUMP_ALL:
9916         iter.cpu_file = RING_BUFFER_ALL_CPUS;
9917         break;
9918     case DUMP_ORIG:
9919         iter.cpu_file = raw_smp_processor_id();
9920         break;
9921     case DUMP_NONE:
9922         goto out_enable;
9923     default:
9924         printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9925         iter.cpu_file = RING_BUFFER_ALL_CPUS;
9926     }
9927 
9928     printk(KERN_TRACE "Dumping ftrace buffer:\n");
9929 
9930     /* Did function tracer already get disabled? */
9931     if (ftrace_is_dead()) {
9932         printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9933         printk("#          MAY BE MISSING FUNCTION EVENTS\n");
9934     }
9935 
9936     /*
9937      * We need to stop all tracing on all CPUS to read
9938      * the next buffer. This is a bit expensive, but is
9939      * not done often. We fill all what we can read,
9940      * and then release the locks again.
9941      */
9942 
9943     while (!trace_empty(&iter)) {
9944 
9945         if (!cnt)
9946             printk(KERN_TRACE "---------------------------------\n");
9947 
9948         cnt++;
9949 
9950         trace_iterator_reset(&iter);
9951         iter.iter_flags |= TRACE_FILE_LAT_FMT;
9952 
9953         if (trace_find_next_entry_inc(&iter) != NULL) {
9954             int ret;
9955 
9956             ret = print_trace_line(&iter);
9957             if (ret != TRACE_TYPE_NO_CONSUME)
9958                 trace_consume(&iter);
9959         }
9960         touch_nmi_watchdog();
9961 
9962         trace_printk_seq(&iter.seq);
9963     }
9964 
9965     if (!cnt)
9966         printk(KERN_TRACE "   (ftrace buffer empty)\n");
9967     else
9968         printk(KERN_TRACE "---------------------------------\n");
9969 
9970  out_enable:
9971     tr->trace_flags |= old_userobj;
9972 
9973     for_each_tracing_cpu(cpu) {
9974         atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9975     }
9976     atomic_dec(&dump_running);
9977     local_irq_restore(flags);
9978 }
9979 EXPORT_SYMBOL_GPL(ftrace_dump);
9980 
9981 #define WRITE_BUFSIZE  4096
9982 
9983 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9984                 size_t count, loff_t *ppos,
9985                 int (*createfn)(const char *))
9986 {
9987     char *kbuf, *buf, *tmp;
9988     int ret = 0;
9989     size_t done = 0;
9990     size_t size;
9991 
9992     kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9993     if (!kbuf)
9994         return -ENOMEM;
9995 
9996     while (done < count) {
9997         size = count - done;
9998 
9999         if (size >= WRITE_BUFSIZE)
10000             size = WRITE_BUFSIZE - 1;
10001 
10002         if (copy_from_user(kbuf, buffer + done, size)) {
10003             ret = -EFAULT;
10004             goto out;
10005         }
10006         kbuf[size] = '\0';
10007         buf = kbuf;
10008         do {
10009             tmp = strchr(buf, '\n');
10010             if (tmp) {
10011                 *tmp = '\0';
10012                 size = tmp - buf + 1;
10013             } else {
10014                 size = strlen(buf);
10015                 if (done + size < count) {
10016                     if (buf != kbuf)
10017                         break;
10018                     /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10019                     pr_warn("Line length is too long: Should be less than %d\n",
10020                         WRITE_BUFSIZE - 2);
10021                     ret = -EINVAL;
10022                     goto out;
10023                 }
10024             }
10025             done += size;
10026 
10027             /* Remove comments */
10028             tmp = strchr(buf, '#');
10029 
10030             if (tmp)
10031                 *tmp = '\0';
10032 
10033             ret = createfn(buf);
10034             if (ret)
10035                 goto out;
10036             buf += size;
10037 
10038         } while (done < count);
10039     }
10040     ret = done;
10041 
10042 out:
10043     kfree(kbuf);
10044 
10045     return ret;
10046 }
10047 
10048 __init static int tracer_alloc_buffers(void)
10049 {
10050     int ring_buf_size;
10051     int ret = -ENOMEM;
10052 
10053 
10054     if (security_locked_down(LOCKDOWN_TRACEFS)) {
10055         pr_warn("Tracing disabled due to lockdown\n");
10056         return -EPERM;
10057     }
10058 
10059     /*
10060      * Make sure we don't accidentally add more trace options
10061      * than we have bits for.
10062      */
10063     BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10064 
10065     if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10066         goto out;
10067 
10068     if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10069         goto out_free_buffer_mask;
10070 
10071     /* Only allocate trace_printk buffers if a trace_printk exists */
10072     if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10073         /* Must be called before global_trace.buffer is allocated */
10074         trace_printk_init_buffers();
10075 
10076     /* To save memory, keep the ring buffer size to its minimum */
10077     if (ring_buffer_expanded)
10078         ring_buf_size = trace_buf_size;
10079     else
10080         ring_buf_size = 1;
10081 
10082     cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10083     cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10084 
10085     raw_spin_lock_init(&global_trace.start_lock);
10086 
10087     /*
10088      * The prepare callbacks allocates some memory for the ring buffer. We
10089      * don't free the buffer if the CPU goes down. If we were to free
10090      * the buffer, then the user would lose any trace that was in the
10091      * buffer. The memory will be removed once the "instance" is removed.
10092      */
10093     ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10094                       "trace/RB:preapre", trace_rb_cpu_prepare,
10095                       NULL);
10096     if (ret < 0)
10097         goto out_free_cpumask;
10098     /* Used for event triggers */
10099     ret = -ENOMEM;
10100     temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10101     if (!temp_buffer)
10102         goto out_rm_hp_state;
10103 
10104     if (trace_create_savedcmd() < 0)
10105         goto out_free_temp_buffer;
10106 
10107     /* TODO: make the number of buffers hot pluggable with CPUS */
10108     if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10109         MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10110         goto out_free_savedcmd;
10111     }
10112 
10113     if (global_trace.buffer_disabled)
10114         tracing_off();
10115 
10116     if (trace_boot_clock) {
10117         ret = tracing_set_clock(&global_trace, trace_boot_clock);
10118         if (ret < 0)
10119             pr_warn("Trace clock %s not defined, going back to default\n",
10120                 trace_boot_clock);
10121     }
10122 
10123     /*
10124      * register_tracer() might reference current_trace, so it
10125      * needs to be set before we register anything. This is
10126      * just a bootstrap of current_trace anyway.
10127      */
10128     global_trace.current_trace = &nop_trace;
10129 
10130     global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10131 
10132     ftrace_init_global_array_ops(&global_trace);
10133 
10134     init_trace_flags_index(&global_trace);
10135 
10136     register_tracer(&nop_trace);
10137 
10138     /* Function tracing may start here (via kernel command line) */
10139     init_function_trace();
10140 
10141     /* All seems OK, enable tracing */
10142     tracing_disabled = 0;
10143 
10144     atomic_notifier_chain_register(&panic_notifier_list,
10145                        &trace_panic_notifier);
10146 
10147     register_die_notifier(&trace_die_notifier);
10148 
10149     global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10150 
10151     INIT_LIST_HEAD(&global_trace.systems);
10152     INIT_LIST_HEAD(&global_trace.events);
10153     INIT_LIST_HEAD(&global_trace.hist_vars);
10154     INIT_LIST_HEAD(&global_trace.err_log);
10155     list_add(&global_trace.list, &ftrace_trace_arrays);
10156 
10157     apply_trace_boot_options();
10158 
10159     register_snapshot_cmd();
10160 
10161     test_can_verify();
10162 
10163     return 0;
10164 
10165 out_free_savedcmd:
10166     free_saved_cmdlines_buffer(savedcmd);
10167 out_free_temp_buffer:
10168     ring_buffer_free(temp_buffer);
10169 out_rm_hp_state:
10170     cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10171 out_free_cpumask:
10172     free_cpumask_var(global_trace.tracing_cpumask);
10173 out_free_buffer_mask:
10174     free_cpumask_var(tracing_buffer_mask);
10175 out:
10176     return ret;
10177 }
10178 
10179 void __init ftrace_boot_snapshot(void)
10180 {
10181     if (snapshot_at_boot) {
10182         tracing_snapshot();
10183         internal_trace_puts("** Boot snapshot taken **\n");
10184     }
10185 }
10186 
10187 void __init early_trace_init(void)
10188 {
10189     if (tracepoint_printk) {
10190         tracepoint_print_iter =
10191             kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10192         if (MEM_FAIL(!tracepoint_print_iter,
10193                  "Failed to allocate trace iterator\n"))
10194             tracepoint_printk = 0;
10195         else
10196             static_key_enable(&tracepoint_printk_key.key);
10197     }
10198     tracer_alloc_buffers();
10199 }
10200 
10201 void __init trace_init(void)
10202 {
10203     trace_event_init();
10204 }
10205 
10206 __init static void clear_boot_tracer(void)
10207 {
10208     /*
10209      * The default tracer at boot buffer is an init section.
10210      * This function is called in lateinit. If we did not
10211      * find the boot tracer, then clear it out, to prevent
10212      * later registration from accessing the buffer that is
10213      * about to be freed.
10214      */
10215     if (!default_bootup_tracer)
10216         return;
10217 
10218     printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10219            default_bootup_tracer);
10220     default_bootup_tracer = NULL;
10221 }
10222 
10223 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10224 __init static void tracing_set_default_clock(void)
10225 {
10226     /* sched_clock_stable() is determined in late_initcall */
10227     if (!trace_boot_clock && !sched_clock_stable()) {
10228         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10229             pr_warn("Can not set tracing clock due to lockdown\n");
10230             return;
10231         }
10232 
10233         printk(KERN_WARNING
10234                "Unstable clock detected, switching default tracing clock to \"global\"\n"
10235                "If you want to keep using the local clock, then add:\n"
10236                "  \"trace_clock=local\"\n"
10237                "on the kernel command line\n");
10238         tracing_set_clock(&global_trace, "global");
10239     }
10240 }
10241 #else
10242 static inline void tracing_set_default_clock(void) { }
10243 #endif
10244 
10245 __init static int late_trace_init(void)
10246 {
10247     if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10248         static_key_disable(&tracepoint_printk_key.key);
10249         tracepoint_printk = 0;
10250     }
10251 
10252     tracing_set_default_clock();
10253     clear_boot_tracer();
10254     return 0;
10255 }
10256 
10257 late_initcall_sync(late_trace_init);