Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * OS Noise Tracer: computes the OS Noise suffered by a running thread.
0004  * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread.
0005  *
0006  * Based on "hwlat_detector" tracer by:
0007  *   Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
0008  *   Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com>
0009  *   With feedback from Clark Williams <williams@redhat.com>
0010  *
0011  * And also based on the rtsl tracer presented on:
0012  *  DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux
0013  *  scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems
0014  *  (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020.
0015  *
0016  * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com>
0017  */
0018 
0019 #include <linux/kthread.h>
0020 #include <linux/tracefs.h>
0021 #include <linux/uaccess.h>
0022 #include <linux/cpumask.h>
0023 #include <linux/delay.h>
0024 #include <linux/sched/clock.h>
0025 #include <uapi/linux/sched/types.h>
0026 #include <linux/sched.h>
0027 #include "trace.h"
0028 
0029 #ifdef CONFIG_X86_LOCAL_APIC
0030 #include <asm/trace/irq_vectors.h>
0031 #undef TRACE_INCLUDE_PATH
0032 #undef TRACE_INCLUDE_FILE
0033 #endif /* CONFIG_X86_LOCAL_APIC */
0034 
0035 #include <trace/events/irq.h>
0036 #include <trace/events/sched.h>
0037 
0038 #define CREATE_TRACE_POINTS
0039 #include <trace/events/osnoise.h>
0040 
0041 /*
0042  * Default values.
0043  */
0044 #define BANNER          "osnoise: "
0045 #define DEFAULT_SAMPLE_PERIOD   1000000         /* 1s */
0046 #define DEFAULT_SAMPLE_RUNTIME  1000000         /* 1s */
0047 
0048 #define DEFAULT_TIMERLAT_PERIOD 1000            /* 1ms */
0049 #define DEFAULT_TIMERLAT_PRIO   95          /* FIFO 95 */
0050 
0051 /*
0052  * trace_array of the enabled osnoise/timerlat instances.
0053  */
0054 struct osnoise_instance {
0055     struct list_head    list;
0056     struct trace_array  *tr;
0057 };
0058 
0059 static struct list_head osnoise_instances;
0060 
0061 static bool osnoise_has_registered_instances(void)
0062 {
0063     return !!list_first_or_null_rcu(&osnoise_instances,
0064                     struct osnoise_instance,
0065                     list);
0066 }
0067 
0068 /*
0069  * osnoise_instance_registered - check if a tr is already registered
0070  */
0071 static int osnoise_instance_registered(struct trace_array *tr)
0072 {
0073     struct osnoise_instance *inst;
0074     int found = 0;
0075 
0076     rcu_read_lock();
0077     list_for_each_entry_rcu(inst, &osnoise_instances, list) {
0078         if (inst->tr == tr)
0079             found = 1;
0080     }
0081     rcu_read_unlock();
0082 
0083     return found;
0084 }
0085 
0086 /*
0087  * osnoise_register_instance - register a new trace instance
0088  *
0089  * Register a trace_array *tr in the list of instances running
0090  * osnoise/timerlat tracers.
0091  */
0092 static int osnoise_register_instance(struct trace_array *tr)
0093 {
0094     struct osnoise_instance *inst;
0095 
0096     /*
0097      * register/unregister serialization is provided by trace's
0098      * trace_types_lock.
0099      */
0100     lockdep_assert_held(&trace_types_lock);
0101 
0102     inst = kmalloc(sizeof(*inst), GFP_KERNEL);
0103     if (!inst)
0104         return -ENOMEM;
0105 
0106     INIT_LIST_HEAD_RCU(&inst->list);
0107     inst->tr = tr;
0108     list_add_tail_rcu(&inst->list, &osnoise_instances);
0109 
0110     return 0;
0111 }
0112 
0113 /*
0114  *  osnoise_unregister_instance - unregister a registered trace instance
0115  *
0116  * Remove the trace_array *tr from the list of instances running
0117  * osnoise/timerlat tracers.
0118  */
0119 static void osnoise_unregister_instance(struct trace_array *tr)
0120 {
0121     struct osnoise_instance *inst;
0122     int found = 0;
0123 
0124     /*
0125      * register/unregister serialization is provided by trace's
0126      * trace_types_lock.
0127      */
0128     lockdep_assert_held(&trace_types_lock);
0129 
0130     list_for_each_entry_rcu(inst, &osnoise_instances, list) {
0131         if (inst->tr == tr) {
0132             list_del_rcu(&inst->list);
0133             found = 1;
0134             break;
0135         }
0136     }
0137 
0138     if (!found)
0139         return;
0140 
0141     kvfree_rcu(inst);
0142 }
0143 
0144 /*
0145  * NMI runtime info.
0146  */
0147 struct osn_nmi {
0148     u64 count;
0149     u64 delta_start;
0150 };
0151 
0152 /*
0153  * IRQ runtime info.
0154  */
0155 struct osn_irq {
0156     u64 count;
0157     u64 arrival_time;
0158     u64 delta_start;
0159 };
0160 
0161 #define IRQ_CONTEXT 0
0162 #define THREAD_CONTEXT  1
0163 /*
0164  * sofirq runtime info.
0165  */
0166 struct osn_softirq {
0167     u64 count;
0168     u64 arrival_time;
0169     u64 delta_start;
0170 };
0171 
0172 /*
0173  * thread runtime info.
0174  */
0175 struct osn_thread {
0176     u64 count;
0177     u64 arrival_time;
0178     u64 delta_start;
0179 };
0180 
0181 /*
0182  * Runtime information: this structure saves the runtime information used by
0183  * one sampling thread.
0184  */
0185 struct osnoise_variables {
0186     struct task_struct  *kthread;
0187     bool            sampling;
0188     pid_t           pid;
0189     struct osn_nmi      nmi;
0190     struct osn_irq      irq;
0191     struct osn_softirq  softirq;
0192     struct osn_thread   thread;
0193     local_t         int_counter;
0194 };
0195 
0196 /*
0197  * Per-cpu runtime information.
0198  */
0199 DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var);
0200 
0201 /*
0202  * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU
0203  */
0204 static inline struct osnoise_variables *this_cpu_osn_var(void)
0205 {
0206     return this_cpu_ptr(&per_cpu_osnoise_var);
0207 }
0208 
0209 #ifdef CONFIG_TIMERLAT_TRACER
0210 /*
0211  * Runtime information for the timer mode.
0212  */
0213 struct timerlat_variables {
0214     struct task_struct  *kthread;
0215     struct hrtimer      timer;
0216     u64         rel_period;
0217     u64         abs_period;
0218     bool            tracing_thread;
0219     u64         count;
0220 };
0221 
0222 DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var);
0223 
0224 /*
0225  * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU
0226  */
0227 static inline struct timerlat_variables *this_cpu_tmr_var(void)
0228 {
0229     return this_cpu_ptr(&per_cpu_timerlat_var);
0230 }
0231 
0232 /*
0233  * tlat_var_reset - Reset the values of the given timerlat_variables
0234  */
0235 static inline void tlat_var_reset(void)
0236 {
0237     struct timerlat_variables *tlat_var;
0238     int cpu;
0239     /*
0240      * So far, all the values are initialized as 0, so
0241      * zeroing the structure is perfect.
0242      */
0243     for_each_cpu(cpu, cpu_online_mask) {
0244         tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
0245         memset(tlat_var, 0, sizeof(*tlat_var));
0246     }
0247 }
0248 #else /* CONFIG_TIMERLAT_TRACER */
0249 #define tlat_var_reset()    do {} while (0)
0250 #endif /* CONFIG_TIMERLAT_TRACER */
0251 
0252 /*
0253  * osn_var_reset - Reset the values of the given osnoise_variables
0254  */
0255 static inline void osn_var_reset(void)
0256 {
0257     struct osnoise_variables *osn_var;
0258     int cpu;
0259 
0260     /*
0261      * So far, all the values are initialized as 0, so
0262      * zeroing the structure is perfect.
0263      */
0264     for_each_cpu(cpu, cpu_online_mask) {
0265         osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
0266         memset(osn_var, 0, sizeof(*osn_var));
0267     }
0268 }
0269 
0270 /*
0271  * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables
0272  */
0273 static inline void osn_var_reset_all(void)
0274 {
0275     osn_var_reset();
0276     tlat_var_reset();
0277 }
0278 
0279 /*
0280  * Tells NMIs to call back to the osnoise tracer to record timestamps.
0281  */
0282 bool trace_osnoise_callback_enabled;
0283 
0284 /*
0285  * osnoise sample structure definition. Used to store the statistics of a
0286  * sample run.
0287  */
0288 struct osnoise_sample {
0289     u64         runtime;    /* runtime */
0290     u64         noise;      /* noise */
0291     u64         max_sample; /* max single noise sample */
0292     int         hw_count;   /* # HW (incl. hypervisor) interference */
0293     int         nmi_count;  /* # NMIs during this sample */
0294     int         irq_count;  /* # IRQs during this sample */
0295     int         softirq_count;  /* # softirqs during this sample */
0296     int         thread_count;   /* # threads during this sample */
0297 };
0298 
0299 #ifdef CONFIG_TIMERLAT_TRACER
0300 /*
0301  * timerlat sample structure definition. Used to store the statistics of
0302  * a sample run.
0303  */
0304 struct timerlat_sample {
0305     u64         timer_latency;  /* timer_latency */
0306     unsigned int        seqnum;     /* unique sequence */
0307     int         context;    /* timer context */
0308 };
0309 #endif
0310 
0311 /*
0312  * Protect the interface.
0313  */
0314 struct mutex interface_lock;
0315 
0316 /*
0317  * Tracer data.
0318  */
0319 static struct osnoise_data {
0320     u64 sample_period;      /* total sampling period */
0321     u64 sample_runtime;     /* active sampling portion of period */
0322     u64 stop_tracing;       /* stop trace in the internal operation (loop/irq) */
0323     u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */
0324 #ifdef CONFIG_TIMERLAT_TRACER
0325     u64 timerlat_period;    /* timerlat period */
0326     u64 print_stack;        /* print IRQ stack if total > */
0327     int timerlat_tracer;    /* timerlat tracer */
0328 #endif
0329     bool    tainted;        /* infor users and developers about a problem */
0330 } osnoise_data = {
0331     .sample_period          = DEFAULT_SAMPLE_PERIOD,
0332     .sample_runtime         = DEFAULT_SAMPLE_RUNTIME,
0333     .stop_tracing           = 0,
0334     .stop_tracing_total     = 0,
0335 #ifdef CONFIG_TIMERLAT_TRACER
0336     .print_stack            = 0,
0337     .timerlat_period        = DEFAULT_TIMERLAT_PERIOD,
0338     .timerlat_tracer        = 0,
0339 #endif
0340 };
0341 
0342 #ifdef CONFIG_TIMERLAT_TRACER
0343 static inline bool timerlat_enabled(void)
0344 {
0345     return osnoise_data.timerlat_tracer;
0346 }
0347 
0348 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
0349 {
0350     struct timerlat_variables *tlat_var = this_cpu_tmr_var();
0351     /*
0352      * If the timerlat is enabled, but the irq handler did
0353      * not run yet enabling timerlat_tracer, do not trace.
0354      */
0355     if (!tlat_var->tracing_thread) {
0356         osn_var->softirq.arrival_time = 0;
0357         osn_var->softirq.delta_start = 0;
0358         return 0;
0359     }
0360     return 1;
0361 }
0362 
0363 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
0364 {
0365     struct timerlat_variables *tlat_var = this_cpu_tmr_var();
0366     /*
0367      * If the timerlat is enabled, but the irq handler did
0368      * not run yet enabling timerlat_tracer, do not trace.
0369      */
0370     if (!tlat_var->tracing_thread) {
0371         osn_var->thread.delta_start = 0;
0372         osn_var->thread.arrival_time = 0;
0373         return 0;
0374     }
0375     return 1;
0376 }
0377 #else /* CONFIG_TIMERLAT_TRACER */
0378 static inline bool timerlat_enabled(void)
0379 {
0380     return false;
0381 }
0382 
0383 static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
0384 {
0385     return 1;
0386 }
0387 static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
0388 {
0389     return 1;
0390 }
0391 #endif
0392 
0393 #ifdef CONFIG_PREEMPT_RT
0394 /*
0395  * Print the osnoise header info.
0396  */
0397 static void print_osnoise_headers(struct seq_file *s)
0398 {
0399     if (osnoise_data.tainted)
0400         seq_puts(s, "# osnoise is tainted!\n");
0401 
0402     seq_puts(s, "#                                _-------=> irqs-off\n");
0403     seq_puts(s, "#                               / _------=> need-resched\n");
0404     seq_puts(s, "#                              | / _-----=> need-resched-lazy\n");
0405     seq_puts(s, "#                              || / _----=> hardirq/softirq\n");
0406     seq_puts(s, "#                              ||| / _---=> preempt-depth\n");
0407     seq_puts(s, "#                              |||| / _--=> preempt-lazy-depth\n");
0408     seq_puts(s, "#                              ||||| / _-=> migrate-disable\n");
0409 
0410     seq_puts(s, "#                              |||||| /          ");
0411     seq_puts(s, "                                     MAX\n");
0412 
0413     seq_puts(s, "#                              ||||| /                         ");
0414     seq_puts(s, "                    SINGLE      Interference counters:\n");
0415 
0416     seq_puts(s, "#                              |||||||               RUNTIME   ");
0417     seq_puts(s, "   NOISE  %% OF CPU  NOISE    +-----------------------------+\n");
0418 
0419     seq_puts(s, "#           TASK-PID      CPU# |||||||   TIMESTAMP    IN US    ");
0420     seq_puts(s, "   IN US  AVAILABLE  IN US     HW    NMI    IRQ   SIRQ THREAD\n");
0421 
0422     seq_puts(s, "#              | |         |   |||||||      |           |      ");
0423     seq_puts(s, "       |    |            |      |      |      |      |      |\n");
0424 }
0425 #else /* CONFIG_PREEMPT_RT */
0426 static void print_osnoise_headers(struct seq_file *s)
0427 {
0428     if (osnoise_data.tainted)
0429         seq_puts(s, "# osnoise is tainted!\n");
0430 
0431     seq_puts(s, "#                                _-----=> irqs-off\n");
0432     seq_puts(s, "#                               / _----=> need-resched\n");
0433     seq_puts(s, "#                              | / _---=> hardirq/softirq\n");
0434     seq_puts(s, "#                              || / _--=> preempt-depth\n");
0435     seq_puts(s, "#                              ||| / _-=> migrate-disable     ");
0436     seq_puts(s, "                    MAX\n");
0437     seq_puts(s, "#                              |||| /     delay               ");
0438     seq_puts(s, "                    SINGLE      Interference counters:\n");
0439 
0440     seq_puts(s, "#                              |||||               RUNTIME   ");
0441     seq_puts(s, "   NOISE  %% OF CPU  NOISE    +-----------------------------+\n");
0442 
0443     seq_puts(s, "#           TASK-PID      CPU# |||||   TIMESTAMP    IN US    ");
0444     seq_puts(s, "   IN US  AVAILABLE  IN US     HW    NMI    IRQ   SIRQ THREAD\n");
0445 
0446     seq_puts(s, "#              | |         |   |||||      |           |      ");
0447     seq_puts(s, "       |    |            |      |      |      |      |      |\n");
0448 }
0449 #endif /* CONFIG_PREEMPT_RT */
0450 
0451 /*
0452  * osnoise_taint - report an osnoise error.
0453  */
0454 #define osnoise_taint(msg) ({                           \
0455     struct osnoise_instance *inst;                      \
0456     struct trace_buffer *buffer;                        \
0457                                         \
0458     rcu_read_lock();                            \
0459     list_for_each_entry_rcu(inst, &osnoise_instances, list) {       \
0460         buffer = inst->tr->array_buffer.buffer;             \
0461         trace_array_printk_buf(buffer, _THIS_IP_, msg);         \
0462     }                                   \
0463     rcu_read_unlock();                          \
0464     osnoise_data.tainted = true;                        \
0465 })
0466 
0467 /*
0468  * Record an osnoise_sample into the tracer buffer.
0469  */
0470 static void
0471 __trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer)
0472 {
0473     struct trace_event_call *call = &event_osnoise;
0474     struct ring_buffer_event *event;
0475     struct osnoise_entry *entry;
0476 
0477     event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry),
0478                       tracing_gen_ctx());
0479     if (!event)
0480         return;
0481     entry   = ring_buffer_event_data(event);
0482     entry->runtime      = sample->runtime;
0483     entry->noise        = sample->noise;
0484     entry->max_sample   = sample->max_sample;
0485     entry->hw_count     = sample->hw_count;
0486     entry->nmi_count    = sample->nmi_count;
0487     entry->irq_count    = sample->irq_count;
0488     entry->softirq_count    = sample->softirq_count;
0489     entry->thread_count = sample->thread_count;
0490 
0491     if (!call_filter_check_discard(call, entry, buffer, event))
0492         trace_buffer_unlock_commit_nostack(buffer, event);
0493 }
0494 
0495 /*
0496  * Record an osnoise_sample on all osnoise instances.
0497  */
0498 static void trace_osnoise_sample(struct osnoise_sample *sample)
0499 {
0500     struct osnoise_instance *inst;
0501     struct trace_buffer *buffer;
0502 
0503     rcu_read_lock();
0504     list_for_each_entry_rcu(inst, &osnoise_instances, list) {
0505         buffer = inst->tr->array_buffer.buffer;
0506         __trace_osnoise_sample(sample, buffer);
0507     }
0508     rcu_read_unlock();
0509 }
0510 
0511 #ifdef CONFIG_TIMERLAT_TRACER
0512 /*
0513  * Print the timerlat header info.
0514  */
0515 #ifdef CONFIG_PREEMPT_RT
0516 static void print_timerlat_headers(struct seq_file *s)
0517 {
0518     seq_puts(s, "#                                _-------=> irqs-off\n");
0519     seq_puts(s, "#                               / _------=> need-resched\n");
0520     seq_puts(s, "#                              | / _-----=> need-resched-lazy\n");
0521     seq_puts(s, "#                              || / _----=> hardirq/softirq\n");
0522     seq_puts(s, "#                              ||| / _---=> preempt-depth\n");
0523     seq_puts(s, "#                              |||| / _--=> preempt-lazy-depth\n");
0524     seq_puts(s, "#                              ||||| / _-=> migrate-disable\n");
0525     seq_puts(s, "#                              |||||| /\n");
0526     seq_puts(s, "#                              |||||||             ACTIVATION\n");
0527     seq_puts(s, "#           TASK-PID      CPU# |||||||   TIMESTAMP    ID     ");
0528     seq_puts(s, "       CONTEXT                LATENCY\n");
0529     seq_puts(s, "#              | |         |   |||||||      |         |      ");
0530     seq_puts(s, "            |                       |\n");
0531 }
0532 #else /* CONFIG_PREEMPT_RT */
0533 static void print_timerlat_headers(struct seq_file *s)
0534 {
0535     seq_puts(s, "#                                _-----=> irqs-off\n");
0536     seq_puts(s, "#                               / _----=> need-resched\n");
0537     seq_puts(s, "#                              | / _---=> hardirq/softirq\n");
0538     seq_puts(s, "#                              || / _--=> preempt-depth\n");
0539     seq_puts(s, "#                              ||| / _-=> migrate-disable\n");
0540     seq_puts(s, "#                              |||| /     delay\n");
0541     seq_puts(s, "#                              |||||            ACTIVATION\n");
0542     seq_puts(s, "#           TASK-PID      CPU# |||||   TIMESTAMP   ID      ");
0543     seq_puts(s, "      CONTEXT                 LATENCY\n");
0544     seq_puts(s, "#              | |         |   |||||      |         |      ");
0545     seq_puts(s, "            |                       |\n");
0546 }
0547 #endif /* CONFIG_PREEMPT_RT */
0548 
0549 static void
0550 __trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer)
0551 {
0552     struct trace_event_call *call = &event_osnoise;
0553     struct ring_buffer_event *event;
0554     struct timerlat_entry *entry;
0555 
0556     event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry),
0557                       tracing_gen_ctx());
0558     if (!event)
0559         return;
0560     entry   = ring_buffer_event_data(event);
0561     entry->seqnum           = sample->seqnum;
0562     entry->context          = sample->context;
0563     entry->timer_latency        = sample->timer_latency;
0564 
0565     if (!call_filter_check_discard(call, entry, buffer, event))
0566         trace_buffer_unlock_commit_nostack(buffer, event);
0567 }
0568 
0569 /*
0570  * Record an timerlat_sample into the tracer buffer.
0571  */
0572 static void trace_timerlat_sample(struct timerlat_sample *sample)
0573 {
0574     struct osnoise_instance *inst;
0575     struct trace_buffer *buffer;
0576 
0577     rcu_read_lock();
0578     list_for_each_entry_rcu(inst, &osnoise_instances, list) {
0579         buffer = inst->tr->array_buffer.buffer;
0580         __trace_timerlat_sample(sample, buffer);
0581     }
0582     rcu_read_unlock();
0583 }
0584 
0585 #ifdef CONFIG_STACKTRACE
0586 
0587 #define MAX_CALLS   256
0588 
0589 /*
0590  * Stack trace will take place only at IRQ level, so, no need
0591  * to control nesting here.
0592  */
0593 struct trace_stack {
0594     int     stack_size;
0595     int     nr_entries;
0596     unsigned long   calls[MAX_CALLS];
0597 };
0598 
0599 static DEFINE_PER_CPU(struct trace_stack, trace_stack);
0600 
0601 /*
0602  * timerlat_save_stack - save a stack trace without printing
0603  *
0604  * Save the current stack trace without printing. The
0605  * stack will be printed later, after the end of the measurement.
0606  */
0607 static void timerlat_save_stack(int skip)
0608 {
0609     unsigned int size, nr_entries;
0610     struct trace_stack *fstack;
0611 
0612     fstack = this_cpu_ptr(&trace_stack);
0613 
0614     size = ARRAY_SIZE(fstack->calls);
0615 
0616     nr_entries = stack_trace_save(fstack->calls, size, skip);
0617 
0618     fstack->stack_size = nr_entries * sizeof(unsigned long);
0619     fstack->nr_entries = nr_entries;
0620 
0621     return;
0622 
0623 }
0624 
0625 static void
0626 __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size)
0627 {
0628     struct trace_event_call *call = &event_osnoise;
0629     struct ring_buffer_event *event;
0630     struct stack_entry *entry;
0631 
0632     event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size,
0633                       tracing_gen_ctx());
0634     if (!event)
0635         return;
0636 
0637     entry = ring_buffer_event_data(event);
0638 
0639     memcpy(&entry->caller, fstack->calls, size);
0640     entry->size = fstack->nr_entries;
0641 
0642     if (!call_filter_check_discard(call, entry, buffer, event))
0643         trace_buffer_unlock_commit_nostack(buffer, event);
0644 }
0645 
0646 /*
0647  * timerlat_dump_stack - dump a stack trace previously saved
0648  */
0649 static void timerlat_dump_stack(u64 latency)
0650 {
0651     struct osnoise_instance *inst;
0652     struct trace_buffer *buffer;
0653     struct trace_stack *fstack;
0654     unsigned int size;
0655 
0656     /*
0657      * trace only if latency > print_stack config, if enabled.
0658      */
0659     if (!osnoise_data.print_stack || osnoise_data.print_stack > latency)
0660         return;
0661 
0662     preempt_disable_notrace();
0663     fstack = this_cpu_ptr(&trace_stack);
0664     size = fstack->stack_size;
0665 
0666     rcu_read_lock();
0667     list_for_each_entry_rcu(inst, &osnoise_instances, list) {
0668         buffer = inst->tr->array_buffer.buffer;
0669         __timerlat_dump_stack(buffer, fstack, size);
0670 
0671     }
0672     rcu_read_unlock();
0673     preempt_enable_notrace();
0674 }
0675 #else /* CONFIG_STACKTRACE */
0676 #define timerlat_dump_stack(u64 latency) do {} while (0)
0677 #define timerlat_save_stack(a) do {} while (0)
0678 #endif /* CONFIG_STACKTRACE */
0679 #endif /* CONFIG_TIMERLAT_TRACER */
0680 
0681 /*
0682  * Macros to encapsulate the time capturing infrastructure.
0683  */
0684 #define time_get()  trace_clock_local()
0685 #define time_to_us(x)   div_u64(x, 1000)
0686 #define time_sub(a, b)  ((a) - (b))
0687 
0688 /*
0689  * cond_move_irq_delta_start - Forward the delta_start of a running IRQ
0690  *
0691  * If an IRQ is preempted by an NMI, its delta_start is pushed forward
0692  * to discount the NMI interference.
0693  *
0694  * See get_int_safe_duration().
0695  */
0696 static inline void
0697 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration)
0698 {
0699     if (osn_var->irq.delta_start)
0700         osn_var->irq.delta_start += duration;
0701 }
0702 
0703 #ifndef CONFIG_PREEMPT_RT
0704 /*
0705  * cond_move_softirq_delta_start - Forward the delta_start of a running softirq.
0706  *
0707  * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed
0708  * forward to discount the interference.
0709  *
0710  * See get_int_safe_duration().
0711  */
0712 static inline void
0713 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration)
0714 {
0715     if (osn_var->softirq.delta_start)
0716         osn_var->softirq.delta_start += duration;
0717 }
0718 #else /* CONFIG_PREEMPT_RT */
0719 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0)
0720 #endif
0721 
0722 /*
0723  * cond_move_thread_delta_start - Forward the delta_start of a running thread
0724  *
0725  * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start
0726  * is pushed forward to discount the interference.
0727  *
0728  * See get_int_safe_duration().
0729  */
0730 static inline void
0731 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration)
0732 {
0733     if (osn_var->thread.delta_start)
0734         osn_var->thread.delta_start += duration;
0735 }
0736 
0737 /*
0738  * get_int_safe_duration - Get the duration of a window
0739  *
0740  * The irq, softirq and thread varaibles need to have its duration without
0741  * the interference from higher priority interrupts. Instead of keeping a
0742  * variable to discount the interrupt interference from these variables, the
0743  * starting time of these variables are pushed forward with the interrupt's
0744  * duration. In this way, a single variable is used to:
0745  *
0746  *   - Know if a given window is being measured.
0747  *   - Account its duration.
0748  *   - Discount the interference.
0749  *
0750  * To avoid getting inconsistent values, e.g.,:
0751  *
0752  *  now = time_get()
0753  *      --->    interrupt!
0754  *          delta_start -= int duration;
0755  *      <---
0756  *  duration = now - delta_start;
0757  *
0758  *  result: negative duration if the variable duration before the
0759  *  interrupt was smaller than the interrupt execution.
0760  *
0761  * A counter of interrupts is used. If the counter increased, try
0762  * to capture an interference safe duration.
0763  */
0764 static inline s64
0765 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start)
0766 {
0767     u64 int_counter, now;
0768     s64 duration;
0769 
0770     do {
0771         int_counter = local_read(&osn_var->int_counter);
0772         /* synchronize with interrupts */
0773         barrier();
0774 
0775         now = time_get();
0776         duration = (now - *delta_start);
0777 
0778         /* synchronize with interrupts */
0779         barrier();
0780     } while (int_counter != local_read(&osn_var->int_counter));
0781 
0782     /*
0783      * This is an evidence of race conditions that cause
0784      * a value to be "discounted" too much.
0785      */
0786     if (duration < 0)
0787         osnoise_taint("Negative duration!\n");
0788 
0789     *delta_start = 0;
0790 
0791     return duration;
0792 }
0793 
0794 /*
0795  *
0796  * set_int_safe_time - Save the current time on *time, aware of interference
0797  *
0798  * Get the time, taking into consideration a possible interference from
0799  * higher priority interrupts.
0800  *
0801  * See get_int_safe_duration() for an explanation.
0802  */
0803 static u64
0804 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time)
0805 {
0806     u64 int_counter;
0807 
0808     do {
0809         int_counter = local_read(&osn_var->int_counter);
0810         /* synchronize with interrupts */
0811         barrier();
0812 
0813         *time = time_get();
0814 
0815         /* synchronize with interrupts */
0816         barrier();
0817     } while (int_counter != local_read(&osn_var->int_counter));
0818 
0819     return int_counter;
0820 }
0821 
0822 #ifdef CONFIG_TIMERLAT_TRACER
0823 /*
0824  * copy_int_safe_time - Copy *src into *desc aware of interference
0825  */
0826 static u64
0827 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src)
0828 {
0829     u64 int_counter;
0830 
0831     do {
0832         int_counter = local_read(&osn_var->int_counter);
0833         /* synchronize with interrupts */
0834         barrier();
0835 
0836         *dst = *src;
0837 
0838         /* synchronize with interrupts */
0839         barrier();
0840     } while (int_counter != local_read(&osn_var->int_counter));
0841 
0842     return int_counter;
0843 }
0844 #endif /* CONFIG_TIMERLAT_TRACER */
0845 
0846 /*
0847  * trace_osnoise_callback - NMI entry/exit callback
0848  *
0849  * This function is called at the entry and exit NMI code. The bool enter
0850  * distinguishes between either case. This function is used to note a NMI
0851  * occurrence, compute the noise caused by the NMI, and to remove the noise
0852  * it is potentially causing on other interference variables.
0853  */
0854 void trace_osnoise_callback(bool enter)
0855 {
0856     struct osnoise_variables *osn_var = this_cpu_osn_var();
0857     u64 duration;
0858 
0859     if (!osn_var->sampling)
0860         return;
0861 
0862     /*
0863      * Currently trace_clock_local() calls sched_clock() and the
0864      * generic version is not NMI safe.
0865      */
0866     if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
0867         if (enter) {
0868             osn_var->nmi.delta_start = time_get();
0869             local_inc(&osn_var->int_counter);
0870         } else {
0871             duration = time_get() - osn_var->nmi.delta_start;
0872 
0873             trace_nmi_noise(osn_var->nmi.delta_start, duration);
0874 
0875             cond_move_irq_delta_start(osn_var, duration);
0876             cond_move_softirq_delta_start(osn_var, duration);
0877             cond_move_thread_delta_start(osn_var, duration);
0878         }
0879     }
0880 
0881     if (enter)
0882         osn_var->nmi.count++;
0883 }
0884 
0885 /*
0886  * osnoise_trace_irq_entry - Note the starting of an IRQ
0887  *
0888  * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs,
0889  * it is safe to use a single variable (ons_var->irq) to save the statistics.
0890  * The arrival_time is used to report... the arrival time. The delta_start
0891  * is used to compute the duration at the IRQ exit handler. See
0892  * cond_move_irq_delta_start().
0893  */
0894 void osnoise_trace_irq_entry(int id)
0895 {
0896     struct osnoise_variables *osn_var = this_cpu_osn_var();
0897 
0898     if (!osn_var->sampling)
0899         return;
0900     /*
0901      * This value will be used in the report, but not to compute
0902      * the execution time, so it is safe to get it unsafe.
0903      */
0904     osn_var->irq.arrival_time = time_get();
0905     set_int_safe_time(osn_var, &osn_var->irq.delta_start);
0906     osn_var->irq.count++;
0907 
0908     local_inc(&osn_var->int_counter);
0909 }
0910 
0911 /*
0912  * osnoise_irq_exit - Note the end of an IRQ, sava data and trace
0913  *
0914  * Computes the duration of the IRQ noise, and trace it. Also discounts the
0915  * interference from other sources of noise could be currently being accounted.
0916  */
0917 void osnoise_trace_irq_exit(int id, const char *desc)
0918 {
0919     struct osnoise_variables *osn_var = this_cpu_osn_var();
0920     int duration;
0921 
0922     if (!osn_var->sampling)
0923         return;
0924 
0925     duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start);
0926     trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration);
0927     osn_var->irq.arrival_time = 0;
0928     cond_move_softirq_delta_start(osn_var, duration);
0929     cond_move_thread_delta_start(osn_var, duration);
0930 }
0931 
0932 /*
0933  * trace_irqentry_callback - Callback to the irq:irq_entry traceevent
0934  *
0935  * Used to note the starting of an IRQ occurece.
0936  */
0937 static void trace_irqentry_callback(void *data, int irq,
0938                     struct irqaction *action)
0939 {
0940     osnoise_trace_irq_entry(irq);
0941 }
0942 
0943 /*
0944  * trace_irqexit_callback - Callback to the irq:irq_exit traceevent
0945  *
0946  * Used to note the end of an IRQ occurece.
0947  */
0948 static void trace_irqexit_callback(void *data, int irq,
0949                    struct irqaction *action, int ret)
0950 {
0951     osnoise_trace_irq_exit(irq, action->name);
0952 }
0953 
0954 /*
0955  * arch specific register function.
0956  */
0957 int __weak osnoise_arch_register(void)
0958 {
0959     return 0;
0960 }
0961 
0962 /*
0963  * arch specific unregister function.
0964  */
0965 void __weak osnoise_arch_unregister(void)
0966 {
0967     return;
0968 }
0969 
0970 /*
0971  * hook_irq_events - Hook IRQ handling events
0972  *
0973  * This function hooks the IRQ related callbacks to the respective trace
0974  * events.
0975  */
0976 static int hook_irq_events(void)
0977 {
0978     int ret;
0979 
0980     ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL);
0981     if (ret)
0982         goto out_err;
0983 
0984     ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL);
0985     if (ret)
0986         goto out_unregister_entry;
0987 
0988     ret = osnoise_arch_register();
0989     if (ret)
0990         goto out_irq_exit;
0991 
0992     return 0;
0993 
0994 out_irq_exit:
0995     unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
0996 out_unregister_entry:
0997     unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
0998 out_err:
0999     return -EINVAL;
1000 }
1001 
1002 /*
1003  * unhook_irq_events - Unhook IRQ handling events
1004  *
1005  * This function unhooks the IRQ related callbacks to the respective trace
1006  * events.
1007  */
1008 static void unhook_irq_events(void)
1009 {
1010     osnoise_arch_unregister();
1011     unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
1012     unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
1013 }
1014 
1015 #ifndef CONFIG_PREEMPT_RT
1016 /*
1017  * trace_softirq_entry_callback - Note the starting of a softirq
1018  *
1019  * Save the starting time of a softirq. As softirqs are non-preemptive to
1020  * other softirqs, it is safe to use a single variable (ons_var->softirq)
1021  * to save the statistics. The arrival_time is used to report... the
1022  * arrival time. The delta_start is used to compute the duration at the
1023  * softirq exit handler. See cond_move_softirq_delta_start().
1024  */
1025 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr)
1026 {
1027     struct osnoise_variables *osn_var = this_cpu_osn_var();
1028 
1029     if (!osn_var->sampling)
1030         return;
1031     /*
1032      * This value will be used in the report, but not to compute
1033      * the execution time, so it is safe to get it unsafe.
1034      */
1035     osn_var->softirq.arrival_time = time_get();
1036     set_int_safe_time(osn_var, &osn_var->softirq.delta_start);
1037     osn_var->softirq.count++;
1038 
1039     local_inc(&osn_var->int_counter);
1040 }
1041 
1042 /*
1043  * trace_softirq_exit_callback - Note the end of an softirq
1044  *
1045  * Computes the duration of the softirq noise, and trace it. Also discounts the
1046  * interference from other sources of noise could be currently being accounted.
1047  */
1048 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr)
1049 {
1050     struct osnoise_variables *osn_var = this_cpu_osn_var();
1051     int duration;
1052 
1053     if (!osn_var->sampling)
1054         return;
1055 
1056     if (unlikely(timerlat_enabled()))
1057         if (!timerlat_softirq_exit(osn_var))
1058             return;
1059 
1060     duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start);
1061     trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration);
1062     cond_move_thread_delta_start(osn_var, duration);
1063     osn_var->softirq.arrival_time = 0;
1064 }
1065 
1066 /*
1067  * hook_softirq_events - Hook softirq handling events
1068  *
1069  * This function hooks the softirq related callbacks to the respective trace
1070  * events.
1071  */
1072 static int hook_softirq_events(void)
1073 {
1074     int ret;
1075 
1076     ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1077     if (ret)
1078         goto out_err;
1079 
1080     ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL);
1081     if (ret)
1082         goto out_unreg_entry;
1083 
1084     return 0;
1085 
1086 out_unreg_entry:
1087     unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1088 out_err:
1089     return -EINVAL;
1090 }
1091 
1092 /*
1093  * unhook_softirq_events - Unhook softirq handling events
1094  *
1095  * This function hooks the softirq related callbacks to the respective trace
1096  * events.
1097  */
1098 static void unhook_softirq_events(void)
1099 {
1100     unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
1101     unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL);
1102 }
1103 #else /* CONFIG_PREEMPT_RT */
1104 /*
1105  * softirq are threads on the PREEMPT_RT mode.
1106  */
1107 static int hook_softirq_events(void)
1108 {
1109     return 0;
1110 }
1111 static void unhook_softirq_events(void)
1112 {
1113 }
1114 #endif
1115 
1116 /*
1117  * thread_entry - Record the starting of a thread noise window
1118  *
1119  * It saves the context switch time for a noisy thread, and increments
1120  * the interference counters.
1121  */
1122 static void
1123 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t)
1124 {
1125     if (!osn_var->sampling)
1126         return;
1127     /*
1128      * The arrival time will be used in the report, but not to compute
1129      * the execution time, so it is safe to get it unsafe.
1130      */
1131     osn_var->thread.arrival_time = time_get();
1132 
1133     set_int_safe_time(osn_var, &osn_var->thread.delta_start);
1134 
1135     osn_var->thread.count++;
1136     local_inc(&osn_var->int_counter);
1137 }
1138 
1139 /*
1140  * thread_exit - Report the end of a thread noise window
1141  *
1142  * It computes the total noise from a thread, tracing if needed.
1143  */
1144 static void
1145 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
1146 {
1147     int duration;
1148 
1149     if (!osn_var->sampling)
1150         return;
1151 
1152     if (unlikely(timerlat_enabled()))
1153         if (!timerlat_thread_exit(osn_var))
1154             return;
1155 
1156     duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start);
1157 
1158     trace_thread_noise(t, osn_var->thread.arrival_time, duration);
1159 
1160     osn_var->thread.arrival_time = 0;
1161 }
1162 
1163 /*
1164  * trace_sched_switch - sched:sched_switch trace event handler
1165  *
1166  * This function is hooked to the sched:sched_switch trace event, and it is
1167  * used to record the beginning and to report the end of a thread noise window.
1168  */
1169 static void
1170 trace_sched_switch_callback(void *data, bool preempt,
1171                 struct task_struct *p,
1172                 struct task_struct *n,
1173                 unsigned int prev_state)
1174 {
1175     struct osnoise_variables *osn_var = this_cpu_osn_var();
1176 
1177     if (p->pid != osn_var->pid)
1178         thread_exit(osn_var, p);
1179 
1180     if (n->pid != osn_var->pid)
1181         thread_entry(osn_var, n);
1182 }
1183 
1184 /*
1185  * hook_thread_events - Hook the insturmentation for thread noise
1186  *
1187  * Hook the osnoise tracer callbacks to handle the noise from other
1188  * threads on the necessary kernel events.
1189  */
1190 static int hook_thread_events(void)
1191 {
1192     int ret;
1193 
1194     ret = register_trace_sched_switch(trace_sched_switch_callback, NULL);
1195     if (ret)
1196         return -EINVAL;
1197 
1198     return 0;
1199 }
1200 
1201 /*
1202  * unhook_thread_events - *nhook the insturmentation for thread noise
1203  *
1204  * Unook the osnoise tracer callbacks to handle the noise from other
1205  * threads on the necessary kernel events.
1206  */
1207 static void unhook_thread_events(void)
1208 {
1209     unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
1210 }
1211 
1212 /*
1213  * save_osn_sample_stats - Save the osnoise_sample statistics
1214  *
1215  * Save the osnoise_sample statistics before the sampling phase. These
1216  * values will be used later to compute the diff betwneen the statistics
1217  * before and after the osnoise sampling.
1218  */
1219 static void
1220 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1221 {
1222     s->nmi_count = osn_var->nmi.count;
1223     s->irq_count = osn_var->irq.count;
1224     s->softirq_count = osn_var->softirq.count;
1225     s->thread_count = osn_var->thread.count;
1226 }
1227 
1228 /*
1229  * diff_osn_sample_stats - Compute the osnoise_sample statistics
1230  *
1231  * After a sample period, compute the difference on the osnoise_sample
1232  * statistics. The struct osnoise_sample *s contains the statistics saved via
1233  * save_osn_sample_stats() before the osnoise sampling.
1234  */
1235 static void
1236 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1237 {
1238     s->nmi_count = osn_var->nmi.count - s->nmi_count;
1239     s->irq_count = osn_var->irq.count - s->irq_count;
1240     s->softirq_count = osn_var->softirq.count - s->softirq_count;
1241     s->thread_count = osn_var->thread.count - s->thread_count;
1242 }
1243 
1244 /*
1245  * osnoise_stop_tracing - Stop tracing and the tracer.
1246  */
1247 static __always_inline void osnoise_stop_tracing(void)
1248 {
1249     struct osnoise_instance *inst;
1250     struct trace_array *tr;
1251 
1252     rcu_read_lock();
1253     list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1254         tr = inst->tr;
1255         trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1256                 "stop tracing hit on cpu %d\n", smp_processor_id());
1257 
1258         tracer_tracing_off(tr);
1259     }
1260     rcu_read_unlock();
1261 }
1262 
1263 /*
1264  * notify_new_max_latency - Notify a new max latency via fsnotify interface.
1265  */
1266 static void notify_new_max_latency(u64 latency)
1267 {
1268     struct osnoise_instance *inst;
1269     struct trace_array *tr;
1270 
1271     rcu_read_lock();
1272     list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1273         tr = inst->tr;
1274         if (tr->max_latency < latency) {
1275             tr->max_latency = latency;
1276             latency_fsnotify(tr);
1277         }
1278     }
1279     rcu_read_unlock();
1280 }
1281 
1282 /*
1283  * run_osnoise - Sample the time and look for osnoise
1284  *
1285  * Used to capture the time, looking for potential osnoise latency repeatedly.
1286  * Different from hwlat_detector, it is called with preemption and interrupts
1287  * enabled. This allows irqs, softirqs and threads to run, interfering on the
1288  * osnoise sampling thread, as they would do with a regular thread.
1289  */
1290 static int run_osnoise(void)
1291 {
1292     struct osnoise_variables *osn_var = this_cpu_osn_var();
1293     u64 start, sample, last_sample;
1294     u64 last_int_count, int_count;
1295     s64 noise = 0, max_noise = 0;
1296     s64 total, last_total = 0;
1297     struct osnoise_sample s;
1298     unsigned int threshold;
1299     u64 runtime, stop_in;
1300     u64 sum_noise = 0;
1301     int hw_count = 0;
1302     int ret = -1;
1303 
1304     /*
1305      * Considers the current thread as the workload.
1306      */
1307     osn_var->pid = current->pid;
1308 
1309     /*
1310      * Save the current stats for the diff
1311      */
1312     save_osn_sample_stats(osn_var, &s);
1313 
1314     /*
1315      * if threshold is 0, use the default value of 5 us.
1316      */
1317     threshold = tracing_thresh ? : 5000;
1318 
1319     /*
1320      * Make sure NMIs see sampling first
1321      */
1322     osn_var->sampling = true;
1323     barrier();
1324 
1325     /*
1326      * Transform the *_us config to nanoseconds to avoid the
1327      * division on the main loop.
1328      */
1329     runtime = osnoise_data.sample_runtime * NSEC_PER_USEC;
1330     stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC;
1331 
1332     /*
1333      * Start timestemp
1334      */
1335     start = time_get();
1336 
1337     /*
1338      * "previous" loop.
1339      */
1340     last_int_count = set_int_safe_time(osn_var, &last_sample);
1341 
1342     do {
1343         /*
1344          * Get sample!
1345          */
1346         int_count = set_int_safe_time(osn_var, &sample);
1347 
1348         noise = time_sub(sample, last_sample);
1349 
1350         /*
1351          * This shouldn't happen.
1352          */
1353         if (noise < 0) {
1354             osnoise_taint("negative noise!");
1355             goto out;
1356         }
1357 
1358         /*
1359          * Sample runtime.
1360          */
1361         total = time_sub(sample, start);
1362 
1363         /*
1364          * Check for possible overflows.
1365          */
1366         if (total < last_total) {
1367             osnoise_taint("total overflow!");
1368             break;
1369         }
1370 
1371         last_total = total;
1372 
1373         if (noise >= threshold) {
1374             int interference = int_count - last_int_count;
1375 
1376             if (noise > max_noise)
1377                 max_noise = noise;
1378 
1379             if (!interference)
1380                 hw_count++;
1381 
1382             sum_noise += noise;
1383 
1384             trace_sample_threshold(last_sample, noise, interference);
1385 
1386             if (osnoise_data.stop_tracing)
1387                 if (noise > stop_in)
1388                     osnoise_stop_tracing();
1389         }
1390 
1391         /*
1392          * In some cases, notably when running on a nohz_full CPU with
1393          * a stopped tick PREEMPT_RCU has no way to account for QSs.
1394          * This will eventually cause unwarranted noise as PREEMPT_RCU
1395          * will force preemption as the means of ending the current
1396          * grace period. We avoid this problem by calling
1397          * rcu_momentary_dyntick_idle(), which performs a zero duration
1398          * EQS allowing PREEMPT_RCU to end the current grace period.
1399          * This call shouldn't be wrapped inside an RCU critical
1400          * section.
1401          *
1402          * Note that in non PREEMPT_RCU kernels QSs are handled through
1403          * cond_resched()
1404          */
1405         if (IS_ENABLED(CONFIG_PREEMPT_RCU)) {
1406             local_irq_disable();
1407             rcu_momentary_dyntick_idle();
1408             local_irq_enable();
1409         }
1410 
1411         /*
1412          * For the non-preemptive kernel config: let threads runs, if
1413          * they so wish.
1414          */
1415         cond_resched();
1416 
1417         last_sample = sample;
1418         last_int_count = int_count;
1419 
1420     } while (total < runtime && !kthread_should_stop());
1421 
1422     /*
1423      * Finish the above in the view for interrupts.
1424      */
1425     barrier();
1426 
1427     osn_var->sampling = false;
1428 
1429     /*
1430      * Make sure sampling data is no longer updated.
1431      */
1432     barrier();
1433 
1434     /*
1435      * Save noise info.
1436      */
1437     s.noise = time_to_us(sum_noise);
1438     s.runtime = time_to_us(total);
1439     s.max_sample = time_to_us(max_noise);
1440     s.hw_count = hw_count;
1441 
1442     /* Save interference stats info */
1443     diff_osn_sample_stats(osn_var, &s);
1444 
1445     trace_osnoise_sample(&s);
1446 
1447     notify_new_max_latency(max_noise);
1448 
1449     if (osnoise_data.stop_tracing_total)
1450         if (s.noise > osnoise_data.stop_tracing_total)
1451             osnoise_stop_tracing();
1452 
1453     return 0;
1454 out:
1455     return ret;
1456 }
1457 
1458 static struct cpumask osnoise_cpumask;
1459 static struct cpumask save_cpumask;
1460 
1461 /*
1462  * osnoise_sleep - sleep until the next period
1463  */
1464 static void osnoise_sleep(void)
1465 {
1466     u64 interval;
1467     ktime_t wake_time;
1468 
1469     mutex_lock(&interface_lock);
1470     interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
1471     mutex_unlock(&interface_lock);
1472 
1473     /*
1474      * differently from hwlat_detector, the osnoise tracer can run
1475      * without a pause because preemption is on.
1476      */
1477     if (!interval) {
1478         /* Let synchronize_rcu_tasks() make progress */
1479         cond_resched_tasks_rcu_qs();
1480         return;
1481     }
1482 
1483     wake_time = ktime_add_us(ktime_get(), interval);
1484     __set_current_state(TASK_INTERRUPTIBLE);
1485 
1486     while (schedule_hrtimeout_range(&wake_time, 0, HRTIMER_MODE_ABS)) {
1487         if (kthread_should_stop())
1488             break;
1489     }
1490 }
1491 
1492 /*
1493  * osnoise_main - The osnoise detection kernel thread
1494  *
1495  * Calls run_osnoise() function to measure the osnoise for the configured runtime,
1496  * every period.
1497  */
1498 static int osnoise_main(void *data)
1499 {
1500 
1501     while (!kthread_should_stop()) {
1502         run_osnoise();
1503         osnoise_sleep();
1504     }
1505 
1506     return 0;
1507 }
1508 
1509 #ifdef CONFIG_TIMERLAT_TRACER
1510 /*
1511  * timerlat_irq - hrtimer handler for timerlat.
1512  */
1513 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer)
1514 {
1515     struct osnoise_variables *osn_var = this_cpu_osn_var();
1516     struct timerlat_variables *tlat;
1517     struct timerlat_sample s;
1518     u64 now;
1519     u64 diff;
1520 
1521     /*
1522      * I am not sure if the timer was armed for this CPU. So, get
1523      * the timerlat struct from the timer itself, not from this
1524      * CPU.
1525      */
1526     tlat = container_of(timer, struct timerlat_variables, timer);
1527 
1528     now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1529 
1530     /*
1531      * Enable the osnoise: events for thread an softirq.
1532      */
1533     tlat->tracing_thread = true;
1534 
1535     osn_var->thread.arrival_time = time_get();
1536 
1537     /*
1538      * A hardirq is running: the timer IRQ. It is for sure preempting
1539      * a thread, and potentially preempting a softirq.
1540      *
1541      * At this point, it is not interesting to know the duration of the
1542      * preempted thread (and maybe softirq), but how much time they will
1543      * delay the beginning of the execution of the timer thread.
1544      *
1545      * To get the correct (net) delay added by the softirq, its delta_start
1546      * is set as the IRQ one. In this way, at the return of the IRQ, the delta
1547      * start of the sofitrq will be zeroed, accounting then only the time
1548      * after that.
1549      *
1550      * The thread follows the same principle. However, if a softirq is
1551      * running, the thread needs to receive the softirq delta_start. The
1552      * reason being is that the softirq will be the last to be unfolded,
1553      * resseting the thread delay to zero.
1554      *
1555      * The PREEMPT_RT is a special case, though. As softirqs run as threads
1556      * on RT, moving the thread is enough.
1557      */
1558     if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) {
1559         copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1560                    &osn_var->softirq.delta_start);
1561 
1562         copy_int_safe_time(osn_var, &osn_var->softirq.delta_start,
1563                     &osn_var->irq.delta_start);
1564     } else {
1565         copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1566                     &osn_var->irq.delta_start);
1567     }
1568 
1569     /*
1570      * Compute the current time with the expected time.
1571      */
1572     diff = now - tlat->abs_period;
1573 
1574     tlat->count++;
1575     s.seqnum = tlat->count;
1576     s.timer_latency = diff;
1577     s.context = IRQ_CONTEXT;
1578 
1579     trace_timerlat_sample(&s);
1580 
1581     if (osnoise_data.stop_tracing) {
1582         if (time_to_us(diff) >= osnoise_data.stop_tracing) {
1583 
1584             /*
1585              * At this point, if stop_tracing is set and <= print_stack,
1586              * print_stack is set and would be printed in the thread handler.
1587              *
1588              * Thus, print the stack trace as it is helpful to define the
1589              * root cause of an IRQ latency.
1590              */
1591             if (osnoise_data.stop_tracing <= osnoise_data.print_stack) {
1592                 timerlat_save_stack(0);
1593                 timerlat_dump_stack(time_to_us(diff));
1594             }
1595 
1596             osnoise_stop_tracing();
1597             notify_new_max_latency(diff);
1598 
1599             return HRTIMER_NORESTART;
1600         }
1601     }
1602 
1603     wake_up_process(tlat->kthread);
1604 
1605     if (osnoise_data.print_stack)
1606         timerlat_save_stack(0);
1607 
1608     return HRTIMER_NORESTART;
1609 }
1610 
1611 /*
1612  * wait_next_period - Wait for the next period for timerlat
1613  */
1614 static int wait_next_period(struct timerlat_variables *tlat)
1615 {
1616     ktime_t next_abs_period, now;
1617     u64 rel_period = osnoise_data.timerlat_period * 1000;
1618 
1619     now = hrtimer_cb_get_time(&tlat->timer);
1620     next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1621 
1622     /*
1623      * Save the next abs_period.
1624      */
1625     tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1626 
1627     /*
1628      * If the new abs_period is in the past, skip the activation.
1629      */
1630     while (ktime_compare(now, next_abs_period) > 0) {
1631         next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1632         tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1633     }
1634 
1635     set_current_state(TASK_INTERRUPTIBLE);
1636 
1637     hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD);
1638     schedule();
1639     return 1;
1640 }
1641 
1642 /*
1643  * timerlat_main- Timerlat main
1644  */
1645 static int timerlat_main(void *data)
1646 {
1647     struct osnoise_variables *osn_var = this_cpu_osn_var();
1648     struct timerlat_variables *tlat = this_cpu_tmr_var();
1649     struct timerlat_sample s;
1650     struct sched_param sp;
1651     u64 now, diff;
1652 
1653     /*
1654      * Make the thread RT, that is how cyclictest is usually used.
1655      */
1656     sp.sched_priority = DEFAULT_TIMERLAT_PRIO;
1657     sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1658 
1659     tlat->count = 0;
1660     tlat->tracing_thread = false;
1661 
1662     hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
1663     tlat->timer.function = timerlat_irq;
1664     tlat->kthread = current;
1665     osn_var->pid = current->pid;
1666     /*
1667      * Anotate the arrival time.
1668      */
1669     tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
1670 
1671     wait_next_period(tlat);
1672 
1673     osn_var->sampling = 1;
1674 
1675     while (!kthread_should_stop()) {
1676         now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1677         diff = now - tlat->abs_period;
1678 
1679         s.seqnum = tlat->count;
1680         s.timer_latency = diff;
1681         s.context = THREAD_CONTEXT;
1682 
1683         trace_timerlat_sample(&s);
1684 
1685         timerlat_dump_stack(time_to_us(diff));
1686 
1687         tlat->tracing_thread = false;
1688         if (osnoise_data.stop_tracing_total)
1689             if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
1690                 osnoise_stop_tracing();
1691 
1692         wait_next_period(tlat);
1693     }
1694 
1695     hrtimer_cancel(&tlat->timer);
1696     return 0;
1697 }
1698 #else /* CONFIG_TIMERLAT_TRACER */
1699 static int timerlat_main(void *data)
1700 {
1701     return 0;
1702 }
1703 #endif /* CONFIG_TIMERLAT_TRACER */
1704 
1705 /*
1706  * stop_kthread - stop a workload thread
1707  */
1708 static void stop_kthread(unsigned int cpu)
1709 {
1710     struct task_struct *kthread;
1711 
1712     kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread;
1713     if (kthread)
1714         kthread_stop(kthread);
1715     per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
1716 }
1717 
1718 /*
1719  * stop_per_cpu_kthread - Stop per-cpu threads
1720  *
1721  * Stop the osnoise sampling htread. Use this on unload and at system
1722  * shutdown.
1723  */
1724 static void stop_per_cpu_kthreads(void)
1725 {
1726     int cpu;
1727 
1728     cpus_read_lock();
1729 
1730     for_each_online_cpu(cpu)
1731         stop_kthread(cpu);
1732 
1733     cpus_read_unlock();
1734 }
1735 
1736 /*
1737  * start_kthread - Start a workload tread
1738  */
1739 static int start_kthread(unsigned int cpu)
1740 {
1741     struct task_struct *kthread;
1742     void *main = osnoise_main;
1743     char comm[24];
1744 
1745     if (timerlat_enabled()) {
1746         snprintf(comm, 24, "timerlat/%d", cpu);
1747         main = timerlat_main;
1748     } else {
1749         snprintf(comm, 24, "osnoise/%d", cpu);
1750     }
1751 
1752     kthread = kthread_run_on_cpu(main, NULL, cpu, comm);
1753 
1754     if (IS_ERR(kthread)) {
1755         pr_err(BANNER "could not start sampling thread\n");
1756         stop_per_cpu_kthreads();
1757         return -ENOMEM;
1758     }
1759 
1760     per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread;
1761 
1762     return 0;
1763 }
1764 
1765 /*
1766  * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads
1767  *
1768  * This starts the kernel thread that will look for osnoise on many
1769  * cpus.
1770  */
1771 static int start_per_cpu_kthreads(void)
1772 {
1773     struct cpumask *current_mask = &save_cpumask;
1774     int retval = 0;
1775     int cpu;
1776 
1777     cpus_read_lock();
1778     /*
1779      * Run only on online CPUs in which osnoise is allowed to run.
1780      */
1781     cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask);
1782 
1783     for_each_possible_cpu(cpu)
1784         per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
1785 
1786     for_each_cpu(cpu, current_mask) {
1787         retval = start_kthread(cpu);
1788         if (retval) {
1789             stop_per_cpu_kthreads();
1790             break;
1791         }
1792     }
1793 
1794     cpus_read_unlock();
1795 
1796     return retval;
1797 }
1798 
1799 #ifdef CONFIG_HOTPLUG_CPU
1800 static void osnoise_hotplug_workfn(struct work_struct *dummy)
1801 {
1802     unsigned int cpu = smp_processor_id();
1803 
1804     mutex_lock(&trace_types_lock);
1805 
1806     if (!osnoise_has_registered_instances())
1807         goto out_unlock_trace;
1808 
1809     mutex_lock(&interface_lock);
1810     cpus_read_lock();
1811 
1812     if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
1813         goto out_unlock;
1814 
1815     start_kthread(cpu);
1816 
1817 out_unlock:
1818     cpus_read_unlock();
1819     mutex_unlock(&interface_lock);
1820 out_unlock_trace:
1821     mutex_unlock(&trace_types_lock);
1822 }
1823 
1824 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn);
1825 
1826 /*
1827  * osnoise_cpu_init - CPU hotplug online callback function
1828  */
1829 static int osnoise_cpu_init(unsigned int cpu)
1830 {
1831     schedule_work_on(cpu, &osnoise_hotplug_work);
1832     return 0;
1833 }
1834 
1835 /*
1836  * osnoise_cpu_die - CPU hotplug offline callback function
1837  */
1838 static int osnoise_cpu_die(unsigned int cpu)
1839 {
1840     stop_kthread(cpu);
1841     return 0;
1842 }
1843 
1844 static void osnoise_init_hotplug_support(void)
1845 {
1846     int ret;
1847 
1848     ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online",
1849                 osnoise_cpu_init, osnoise_cpu_die);
1850     if (ret < 0)
1851         pr_warn(BANNER "Error to init cpu hotplug support\n");
1852 
1853     return;
1854 }
1855 #else /* CONFIG_HOTPLUG_CPU */
1856 static void osnoise_init_hotplug_support(void)
1857 {
1858     return;
1859 }
1860 #endif /* CONFIG_HOTPLUG_CPU */
1861 
1862 /*
1863  * osnoise_cpus_read - Read function for reading the "cpus" file
1864  * @filp: The active open file structure
1865  * @ubuf: The userspace provided buffer to read value into
1866  * @cnt: The maximum number of bytes to read
1867  * @ppos: The current "file" position
1868  *
1869  * Prints the "cpus" output into the user-provided buffer.
1870  */
1871 static ssize_t
1872 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count,
1873           loff_t *ppos)
1874 {
1875     char *mask_str;
1876     int len;
1877 
1878     mutex_lock(&interface_lock);
1879 
1880     len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1;
1881     mask_str = kmalloc(len, GFP_KERNEL);
1882     if (!mask_str) {
1883         count = -ENOMEM;
1884         goto out_unlock;
1885     }
1886 
1887     len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask));
1888     if (len >= count) {
1889         count = -EINVAL;
1890         goto out_free;
1891     }
1892 
1893     count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
1894 
1895 out_free:
1896     kfree(mask_str);
1897 out_unlock:
1898     mutex_unlock(&interface_lock);
1899 
1900     return count;
1901 }
1902 
1903 /*
1904  * osnoise_cpus_write - Write function for "cpus" entry
1905  * @filp: The active open file structure
1906  * @ubuf: The user buffer that contains the value to write
1907  * @cnt: The maximum number of bytes to write to "file"
1908  * @ppos: The current position in @file
1909  *
1910  * This function provides a write implementation for the "cpus"
1911  * interface to the osnoise trace. By default, it lists all  CPUs,
1912  * in this way, allowing osnoise threads to run on any online CPU
1913  * of the system. It serves to restrict the execution of osnoise to the
1914  * set of CPUs writing via this interface. Why not use "tracing_cpumask"?
1915  * Because the user might be interested in tracing what is running on
1916  * other CPUs. For instance, one might run osnoise in one HT CPU
1917  * while observing what is running on the sibling HT CPU.
1918  */
1919 static ssize_t
1920 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
1921            loff_t *ppos)
1922 {
1923     cpumask_var_t osnoise_cpumask_new;
1924     int running, err;
1925     char buf[256];
1926 
1927     if (count >= 256)
1928         return -EINVAL;
1929 
1930     if (copy_from_user(buf, ubuf, count))
1931         return -EFAULT;
1932 
1933     if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL))
1934         return -ENOMEM;
1935 
1936     err = cpulist_parse(buf, osnoise_cpumask_new);
1937     if (err)
1938         goto err_free;
1939 
1940     /*
1941      * trace_types_lock is taken to avoid concurrency on start/stop.
1942      */
1943     mutex_lock(&trace_types_lock);
1944     running = osnoise_has_registered_instances();
1945     if (running)
1946         stop_per_cpu_kthreads();
1947 
1948     mutex_lock(&interface_lock);
1949     /*
1950      * osnoise_cpumask is read by CPU hotplug operations.
1951      */
1952     cpus_read_lock();
1953 
1954     cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new);
1955 
1956     cpus_read_unlock();
1957     mutex_unlock(&interface_lock);
1958 
1959     if (running)
1960         start_per_cpu_kthreads();
1961     mutex_unlock(&trace_types_lock);
1962 
1963     free_cpumask_var(osnoise_cpumask_new);
1964     return count;
1965 
1966 err_free:
1967     free_cpumask_var(osnoise_cpumask_new);
1968 
1969     return err;
1970 }
1971 
1972 /*
1973  * osnoise/runtime_us: cannot be greater than the period.
1974  */
1975 static struct trace_min_max_param osnoise_runtime = {
1976     .lock   = &interface_lock,
1977     .val    = &osnoise_data.sample_runtime,
1978     .max    = &osnoise_data.sample_period,
1979     .min    = NULL,
1980 };
1981 
1982 /*
1983  * osnoise/period_us: cannot be smaller than the runtime.
1984  */
1985 static struct trace_min_max_param osnoise_period = {
1986     .lock   = &interface_lock,
1987     .val    = &osnoise_data.sample_period,
1988     .max    = NULL,
1989     .min    = &osnoise_data.sample_runtime,
1990 };
1991 
1992 /*
1993  * osnoise/stop_tracing_us: no limit.
1994  */
1995 static struct trace_min_max_param osnoise_stop_tracing_in = {
1996     .lock   = &interface_lock,
1997     .val    = &osnoise_data.stop_tracing,
1998     .max    = NULL,
1999     .min    = NULL,
2000 };
2001 
2002 /*
2003  * osnoise/stop_tracing_total_us: no limit.
2004  */
2005 static struct trace_min_max_param osnoise_stop_tracing_total = {
2006     .lock   = &interface_lock,
2007     .val    = &osnoise_data.stop_tracing_total,
2008     .max    = NULL,
2009     .min    = NULL,
2010 };
2011 
2012 #ifdef CONFIG_TIMERLAT_TRACER
2013 /*
2014  * osnoise/print_stack: print the stacktrace of the IRQ handler if the total
2015  * latency is higher than val.
2016  */
2017 static struct trace_min_max_param osnoise_print_stack = {
2018     .lock   = &interface_lock,
2019     .val    = &osnoise_data.print_stack,
2020     .max    = NULL,
2021     .min    = NULL,
2022 };
2023 
2024 /*
2025  * osnoise/timerlat_period: min 100 us, max 1 s
2026  */
2027 u64 timerlat_min_period = 100;
2028 u64 timerlat_max_period = 1000000;
2029 static struct trace_min_max_param timerlat_period = {
2030     .lock   = &interface_lock,
2031     .val    = &osnoise_data.timerlat_period,
2032     .max    = &timerlat_max_period,
2033     .min    = &timerlat_min_period,
2034 };
2035 #endif
2036 
2037 static const struct file_operations cpus_fops = {
2038     .open       = tracing_open_generic,
2039     .read       = osnoise_cpus_read,
2040     .write      = osnoise_cpus_write,
2041     .llseek     = generic_file_llseek,
2042 };
2043 
2044 #ifdef CONFIG_TIMERLAT_TRACER
2045 #ifdef CONFIG_STACKTRACE
2046 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2047 {
2048     struct dentry *tmp;
2049 
2050     tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir,
2051                   &osnoise_print_stack, &trace_min_max_fops);
2052     if (!tmp)
2053         return -ENOMEM;
2054 
2055     return 0;
2056 }
2057 #else /* CONFIG_STACKTRACE */
2058 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2059 {
2060     return 0;
2061 }
2062 #endif /* CONFIG_STACKTRACE */
2063 
2064 /*
2065  * init_timerlat_tracefs - A function to initialize the timerlat interface files
2066  */
2067 static int init_timerlat_tracefs(struct dentry *top_dir)
2068 {
2069     struct dentry *tmp;
2070 
2071     tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir,
2072                   &timerlat_period, &trace_min_max_fops);
2073     if (!tmp)
2074         return -ENOMEM;
2075 
2076     return init_timerlat_stack_tracefs(top_dir);
2077 }
2078 #else /* CONFIG_TIMERLAT_TRACER */
2079 static int init_timerlat_tracefs(struct dentry *top_dir)
2080 {
2081     return 0;
2082 }
2083 #endif /* CONFIG_TIMERLAT_TRACER */
2084 
2085 /*
2086  * init_tracefs - A function to initialize the tracefs interface files
2087  *
2088  * This function creates entries in tracefs for "osnoise" and "timerlat".
2089  * It creates these directories in the tracing directory, and within that
2090  * directory the use can change and view the configs.
2091  */
2092 static int init_tracefs(void)
2093 {
2094     struct dentry *top_dir;
2095     struct dentry *tmp;
2096     int ret;
2097 
2098     ret = tracing_init_dentry();
2099     if (ret)
2100         return -ENOMEM;
2101 
2102     top_dir = tracefs_create_dir("osnoise", NULL);
2103     if (!top_dir)
2104         return 0;
2105 
2106     tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir,
2107                   &osnoise_period, &trace_min_max_fops);
2108     if (!tmp)
2109         goto err;
2110 
2111     tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir,
2112                   &osnoise_runtime, &trace_min_max_fops);
2113     if (!tmp)
2114         goto err;
2115 
2116     tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir,
2117                   &osnoise_stop_tracing_in, &trace_min_max_fops);
2118     if (!tmp)
2119         goto err;
2120 
2121     tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir,
2122                   &osnoise_stop_tracing_total, &trace_min_max_fops);
2123     if (!tmp)
2124         goto err;
2125 
2126     tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops);
2127     if (!tmp)
2128         goto err;
2129 
2130     ret = init_timerlat_tracefs(top_dir);
2131     if (ret)
2132         goto err;
2133 
2134     return 0;
2135 
2136 err:
2137     tracefs_remove(top_dir);
2138     return -ENOMEM;
2139 }
2140 
2141 static int osnoise_hook_events(void)
2142 {
2143     int retval;
2144 
2145     /*
2146      * Trace is already hooked, we are re-enabling from
2147      * a stop_tracing_*.
2148      */
2149     if (trace_osnoise_callback_enabled)
2150         return 0;
2151 
2152     retval = hook_irq_events();
2153     if (retval)
2154         return -EINVAL;
2155 
2156     retval = hook_softirq_events();
2157     if (retval)
2158         goto out_unhook_irq;
2159 
2160     retval = hook_thread_events();
2161     /*
2162      * All fine!
2163      */
2164     if (!retval)
2165         return 0;
2166 
2167     unhook_softirq_events();
2168 out_unhook_irq:
2169     unhook_irq_events();
2170     return -EINVAL;
2171 }
2172 
2173 static void osnoise_unhook_events(void)
2174 {
2175     unhook_thread_events();
2176     unhook_softirq_events();
2177     unhook_irq_events();
2178 }
2179 
2180 /*
2181  * osnoise_workload_start - start the workload and hook to events
2182  */
2183 static int osnoise_workload_start(void)
2184 {
2185     int retval;
2186 
2187     /*
2188      * Instances need to be registered after calling workload
2189      * start. Hence, if there is already an instance, the
2190      * workload was already registered. Otherwise, this
2191      * code is on the way to register the first instance,
2192      * and the workload will start.
2193      */
2194     if (osnoise_has_registered_instances())
2195         return 0;
2196 
2197     osn_var_reset_all();
2198 
2199     retval = osnoise_hook_events();
2200     if (retval)
2201         return retval;
2202 
2203     /*
2204      * Make sure that ftrace_nmi_enter/exit() see reset values
2205      * before enabling trace_osnoise_callback_enabled.
2206      */
2207     barrier();
2208     trace_osnoise_callback_enabled = true;
2209 
2210     retval = start_per_cpu_kthreads();
2211     if (retval) {
2212         trace_osnoise_callback_enabled = false;
2213         /*
2214          * Make sure that ftrace_nmi_enter/exit() see
2215          * trace_osnoise_callback_enabled as false before continuing.
2216          */
2217         barrier();
2218 
2219         osnoise_unhook_events();
2220         return retval;
2221     }
2222 
2223     return 0;
2224 }
2225 
2226 /*
2227  * osnoise_workload_stop - stop the workload and unhook the events
2228  */
2229 static void osnoise_workload_stop(void)
2230 {
2231     /*
2232      * Instances need to be unregistered before calling
2233      * stop. Hence, if there is a registered instance, more
2234      * than one instance is running, and the workload will not
2235      * yet stop. Otherwise, this code is on the way to disable
2236      * the last instance, and the workload can stop.
2237      */
2238     if (osnoise_has_registered_instances())
2239         return;
2240 
2241     /*
2242      * If callbacks were already disabled in a previous stop
2243      * call, there is no need to disable then again.
2244      *
2245      * For instance, this happens when tracing is stopped via:
2246      * echo 0 > tracing_on
2247      * echo nop > current_tracer.
2248      */
2249     if (!trace_osnoise_callback_enabled)
2250         return;
2251 
2252     trace_osnoise_callback_enabled = false;
2253     /*
2254      * Make sure that ftrace_nmi_enter/exit() see
2255      * trace_osnoise_callback_enabled as false before continuing.
2256      */
2257     barrier();
2258 
2259     stop_per_cpu_kthreads();
2260 
2261     osnoise_unhook_events();
2262 }
2263 
2264 static void osnoise_tracer_start(struct trace_array *tr)
2265 {
2266     int retval;
2267 
2268     /*
2269      * If the instance is already registered, there is no need to
2270      * register it again.
2271      */
2272     if (osnoise_instance_registered(tr))
2273         return;
2274 
2275     retval = osnoise_workload_start();
2276     if (retval)
2277         pr_err(BANNER "Error starting osnoise tracer\n");
2278 
2279     osnoise_register_instance(tr);
2280 }
2281 
2282 static void osnoise_tracer_stop(struct trace_array *tr)
2283 {
2284     osnoise_unregister_instance(tr);
2285     osnoise_workload_stop();
2286 }
2287 
2288 static int osnoise_tracer_init(struct trace_array *tr)
2289 {
2290     /*
2291      * Only allow osnoise tracer if timerlat tracer is not running
2292      * already.
2293      */
2294     if (timerlat_enabled())
2295         return -EBUSY;
2296 
2297     tr->max_latency = 0;
2298 
2299     osnoise_tracer_start(tr);
2300     return 0;
2301 }
2302 
2303 static void osnoise_tracer_reset(struct trace_array *tr)
2304 {
2305     osnoise_tracer_stop(tr);
2306 }
2307 
2308 static struct tracer osnoise_tracer __read_mostly = {
2309     .name       = "osnoise",
2310     .init       = osnoise_tracer_init,
2311     .reset      = osnoise_tracer_reset,
2312     .start      = osnoise_tracer_start,
2313     .stop       = osnoise_tracer_stop,
2314     .print_header   = print_osnoise_headers,
2315     .allow_instances = true,
2316 };
2317 
2318 #ifdef CONFIG_TIMERLAT_TRACER
2319 static void timerlat_tracer_start(struct trace_array *tr)
2320 {
2321     int retval;
2322 
2323     /*
2324      * If the instance is already registered, there is no need to
2325      * register it again.
2326      */
2327     if (osnoise_instance_registered(tr))
2328         return;
2329 
2330     retval = osnoise_workload_start();
2331     if (retval)
2332         pr_err(BANNER "Error starting timerlat tracer\n");
2333 
2334     osnoise_register_instance(tr);
2335 
2336     return;
2337 }
2338 
2339 static void timerlat_tracer_stop(struct trace_array *tr)
2340 {
2341     int cpu;
2342 
2343     osnoise_unregister_instance(tr);
2344 
2345     /*
2346      * Instruct the threads to stop only if this is the last instance.
2347      */
2348     if (!osnoise_has_registered_instances()) {
2349         for_each_online_cpu(cpu)
2350             per_cpu(per_cpu_osnoise_var, cpu).sampling = 0;
2351     }
2352 
2353     osnoise_workload_stop();
2354 }
2355 
2356 static int timerlat_tracer_init(struct trace_array *tr)
2357 {
2358     /*
2359      * Only allow timerlat tracer if osnoise tracer is not running already.
2360      */
2361     if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer)
2362         return -EBUSY;
2363 
2364     /*
2365      * If this is the first instance, set timerlat_tracer to block
2366      * osnoise tracer start.
2367      */
2368     if (!osnoise_has_registered_instances())
2369         osnoise_data.timerlat_tracer = 1;
2370 
2371     tr->max_latency = 0;
2372     timerlat_tracer_start(tr);
2373 
2374     return 0;
2375 }
2376 
2377 static void timerlat_tracer_reset(struct trace_array *tr)
2378 {
2379     timerlat_tracer_stop(tr);
2380 
2381     /*
2382      * If this is the last instance, reset timerlat_tracer allowing
2383      * osnoise to be started.
2384      */
2385     if (!osnoise_has_registered_instances())
2386         osnoise_data.timerlat_tracer = 0;
2387 }
2388 
2389 static struct tracer timerlat_tracer __read_mostly = {
2390     .name       = "timerlat",
2391     .init       = timerlat_tracer_init,
2392     .reset      = timerlat_tracer_reset,
2393     .start      = timerlat_tracer_start,
2394     .stop       = timerlat_tracer_stop,
2395     .print_header   = print_timerlat_headers,
2396     .allow_instances = true,
2397 };
2398 
2399 __init static int init_timerlat_tracer(void)
2400 {
2401     return register_tracer(&timerlat_tracer);
2402 }
2403 #else /* CONFIG_TIMERLAT_TRACER */
2404 __init static int init_timerlat_tracer(void)
2405 {
2406     return 0;
2407 }
2408 #endif /* CONFIG_TIMERLAT_TRACER */
2409 
2410 __init static int init_osnoise_tracer(void)
2411 {
2412     int ret;
2413 
2414     mutex_init(&interface_lock);
2415 
2416     cpumask_copy(&osnoise_cpumask, cpu_all_mask);
2417 
2418     ret = register_tracer(&osnoise_tracer);
2419     if (ret) {
2420         pr_err(BANNER "Error registering osnoise!\n");
2421         return ret;
2422     }
2423 
2424     ret = init_timerlat_tracer();
2425     if (ret) {
2426         pr_err(BANNER "Error registering timerlat!\n");
2427         return ret;
2428     }
2429 
2430     osnoise_init_hotplug_support();
2431 
2432     INIT_LIST_HEAD_RCU(&osnoise_instances);
2433 
2434     init_tracefs();
2435 
2436     return 0;
2437 }
2438 late_initcall(init_osnoise_tracer);