kernel/irq/timings.c

0001 // SPDX-License-Identifier: GPL-2.0
0002 // Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
0003 #define pr_fmt(fmt) "irq_timings: " fmt
0004
0005 #include <linux/kernel.h>
0006 #include <linux/percpu.h>
0007 #include <linux/slab.h>
0008 #include <linux/static_key.h>
0009 #include <linux/init.h>
0010 #include <linux/interrupt.h>
0011 #include <linux/idr.h>
0012 #include <linux/irq.h>
0013 #include <linux/math64.h>
0014 #include <linux/log2.h>
0015
0016 #include <trace/events/irq.h>
0017
0018 #include "internals.h"
0019
0020 DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
0021
0022 DEFINE_PER_CPU(struct irq_timings, irq_timings);
0023
0024 static DEFINE_IDR(irqt_stats);
0025
0026 void irq_timings_enable(void)
0027 {
0028     static_branch_enable(&irq_timing_enabled);
0029 }
0030
0031 void irq_timings_disable(void)
0032 {
0033     static_branch_disable(&irq_timing_enabled);
0034 }
0035
0036 /*
0037  * The main goal of this algorithm is to predict the next interrupt
0038  * occurrence on the current CPU.
0039  *
0040  * Currently, the interrupt timings are stored in a circular array
0041  * buffer every time there is an interrupt, as a tuple: the interrupt
0042  * number and the associated timestamp when the event occurred <irq,
0043  * timestamp>.
0044  *
0045  * For every interrupt occurring in a short period of time, we can
0046  * measure the elapsed time between the occurrences for the same
0047  * interrupt and we end up with a suite of intervals. The experience
0048  * showed the interrupts are often coming following a periodic
0049  * pattern.
0050  *
0051  * The objective of the algorithm is to find out this periodic pattern
0052  * in a fastest way and use its period to predict the next irq event.
0053  *
0054  * When the next interrupt event is requested, we are in the situation
0055  * where the interrupts are disabled and the circular buffer
0056  * containing the timings is filled with the events which happened
0057  * after the previous next-interrupt-event request.
0058  *
0059  * At this point, we read the circular buffer and we fill the irq
0060  * related statistics structure. After this step, the circular array
0061  * containing the timings is empty because all the values are
0062  * dispatched in their corresponding buffers.
0063  *
0064  * Now for each interrupt, we can predict the next event by using the
0065  * suffix array, log interval and exponential moving average
0066  *
0067  * 1. Suffix array
0068  *
0069  * Suffix array is an array of all the suffixes of a string. It is
0070  * widely used as a data structure for compression, text search, ...
0071  * For instance for the word 'banana', the suffixes will be: 'banana'
0072  * 'anana' 'nana' 'ana' 'na' 'a'
0073  *
0074  * Usually, the suffix array is sorted but for our purpose it is
0075  * not necessary and won't provide any improvement in the context of
0076  * the solved problem where we clearly define the boundaries of the
0077  * search by a max period and min period.
0078  *
0079  * The suffix array will build a suite of intervals of different
0080  * length and will look for the repetition of each suite. If the suite
0081  * is repeating then we have the period because it is the length of
0082  * the suite whatever its position in the buffer.
0083  *
0084  * 2. Log interval
0085  *
0086  * We saw the irq timings allow to compute the interval of the
0087  * occurrences for a specific interrupt. We can reasonably assume the
0088  * longer is the interval, the higher is the error for the next event
0089  * and we can consider storing those interval values into an array
0090  * where each slot in the array correspond to an interval at the power
0091  * of 2 of the index. For example, index 12 will contain values
0092  * between 2^11 and 2^12.
0093  *
0094  * At the end we have an array of values where at each index defines a
0095  * [2^index - 1, 2 ^ index] interval values allowing to store a large
0096  * number of values inside a small array.
0097  *
0098  * For example, if we have the value 1123, then we store it at
0099  * ilog2(1123) = 10 index value.
0100  *
0101  * Storing those value at the specific index is done by computing an
0102  * exponential moving average for this specific slot. For instance,
0103  * for values 1800, 1123, 1453, ... fall under the same slot (10) and
0104  * the exponential moving average is computed every time a new value
0105  * is stored at this slot.
0106  *
0107  * 3. Exponential Moving Average
0108  *
0109  * The EMA is largely used to track a signal for stocks or as a low
0110  * pass filter. The magic of the formula, is it is very simple and the
0111  * reactivity of the average can be tuned with the factors called
0112  * alpha.
0113  *
0114  * The higher the alphas are, the faster the average respond to the
0115  * signal change. In our case, if a slot in the array is a big
0116  * interval, we can have numbers with a big difference between
0117  * them. The impact of those differences in the average computation
0118  * can be tuned by changing the alpha value.
0119  *
0120  *
0121  *  -- The algorithm --
0122  *
0123  * We saw the different processing above, now let's see how they are
0124  * used together.
0125  *
0126  * For each interrupt:
0127  *  For each interval:
0128  *      Compute the index = ilog2(interval)
0129  *      Compute a new_ema(buffer[index], interval)
0130  *      Store the index in a circular buffer
0131  *
0132  *  Compute the suffix array of the indexes
0133  *
0134  *  For each suffix:
0135  *      If the suffix is reverse-found 3 times
0136  *          Return suffix
0137  *
0138  *  Return Not found
0139  *
0140  * However we can not have endless suffix array to be build, it won't
0141  * make sense and it will add an extra overhead, so we can restrict
0142  * this to a maximum suffix length of 5 and a minimum suffix length of
0143  * 2. The experience showed 5 is the majority of the maximum pattern
0144  * period found for different devices.
0145  *
0146  * The result is a pattern finding less than 1us for an interrupt.
0147  *
0148  * Example based on real values:
0149  *
0150  * Example 1 : MMC write/read interrupt interval:
0151  *
0152  *  223947, 1240, 1384, 1386, 1386,
0153  *  217416, 1236, 1384, 1386, 1387,
0154  *  214719, 1241, 1386, 1387, 1384,
0155  *  213696, 1234, 1384, 1386, 1388,
0156  *  219904, 1240, 1385, 1389, 1385,
0157  *  212240, 1240, 1386, 1386, 1386,
0158  *  214415, 1236, 1384, 1386, 1387,
0159  *  214276, 1234, 1384, 1388, ?
0160  *
0161  * For each element, apply ilog2(value)
0162  *
0163  *  15, 8, 8, 8, 8,
0164  *  15, 8, 8, 8, 8,
0165  *  15, 8, 8, 8, 8,
0166  *  15, 8, 8, 8, 8,
0167  *  15, 8, 8, 8, 8,
0168  *  15, 8, 8, 8, 8,
0169  *  15, 8, 8, 8, 8,
0170  *  15, 8, 8, 8, ?
0171  *
0172  * Max period of 5, we take the last (max_period * 3) 15 elements as
0173  * we can be confident if the pattern repeats itself three times it is
0174  * a repeating pattern.
0175  *
0176  *               8,
0177  *  15, 8, 8, 8, 8,
0178  *  15, 8, 8, 8, 8,
0179  *  15, 8, 8, 8, ?
0180  *
0181  * Suffixes are:
0182  *
0183  *  1) 8, 15, 8, 8, 8  <- max period
0184  *  2) 8, 15, 8, 8
0185  *  3) 8, 15, 8
0186  *  4) 8, 15           <- min period
0187  *
0188  * From there we search the repeating pattern for each suffix.
0189  *
0190  * buffer: 8, 15, 8, 8, 8, 8, 15, 8, 8, 8, 8, 15, 8, 8, 8
0191  *         |   |  |  |  |  |   |  |  |  |  |   |  |  |  |
0192  *         8, 15, 8, 8, 8  |   |  |  |  |  |   |  |  |  |
0193  *                         8, 15, 8, 8, 8  |   |  |  |  |
0194  *                                         8, 15, 8, 8, 8
0195  *
0196  * When moving the suffix, we found exactly 3 matches.
0197  *
0198  * The first suffix with period 5 is repeating.
0199  *
0200  * The next event is (3 * max_period) % suffix_period
0201  *
0202  * In this example, the result 0, so the next event is suffix[0] => 8
0203  *
0204  * However, 8 is the index in the array of exponential moving average
0205  * which was calculated on the fly when storing the values, so the
0206  * interval is ema[8] = 1366
0207  *
0208  *
0209  * Example 2:
0210  *
0211  *  4, 3, 5, 100,
0212  *  3, 3, 5, 117,
0213  *  4, 4, 5, 112,
0214  *  4, 3, 4, 110,
0215  *  3, 5, 3, 117,
0216  *  4, 4, 5, 112,
0217  *  4, 3, 4, 110,
0218  *  3, 4, 5, 112,
0219  *  4, 3, 4, 110
0220  *
0221  * ilog2
0222  *
0223  *  0, 0, 0, 4,
0224  *  0, 0, 0, 4,
0225  *  0, 0, 0, 4,
0226  *  0, 0, 0, 4,
0227  *  0, 0, 0, 4,
0228  *  0, 0, 0, 4,
0229  *  0, 0, 0, 4,
0230  *  0, 0, 0, 4,
0231  *  0, 0, 0, 4
0232  *
0233  * Max period 5:
0234  *     0, 0, 4,
0235  *  0, 0, 0, 4,
0236  *  0, 0, 0, 4,
0237  *  0, 0, 0, 4
0238  *
0239  * Suffixes:
0240  *
0241  *  1) 0, 0, 4, 0, 0
0242  *  2) 0, 0, 4, 0
0243  *  3) 0, 0, 4
0244  *  4) 0, 0
0245  *
0246  * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4
0247  *         |  |  |  |  |  |  X
0248  *         0, 0, 4, 0, 0, |  X
0249  *                        0, 0
0250  *
0251  * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4
0252  *         |  |  |  |  |  |  |  |  |  |  |  |  |  |  |
0253  *         0, 0, 4, 0, |  |  |  |  |  |  |  |  |  |  |
0254  *                     0, 0, 4, 0, |  |  |  |  |  |  |
0255  *                                 0, 0, 4, 0, |  |  |
0256  *                                             0  0  4
0257  *
0258  * Pattern is found 3 times, the remaining is 1 which results from
0259  * (max_period * 3) % suffix_period. This value is the index in the
0260  * suffix arrays. The suffix array for a period 4 has the value 4
0261  * at index 1.
0262  */
0263 #define EMA_ALPHA_VAL       64
0264 #define EMA_ALPHA_SHIFT     7
0265
0266 #define PREDICTION_PERIOD_MIN   3
0267 #define PREDICTION_PERIOD_MAX   5
0268 #define PREDICTION_FACTOR   4
0269 #define PREDICTION_MAX      10 /* 2 ^ PREDICTION_MAX useconds */
0270 #define PREDICTION_BUFFER_SIZE  16 /* slots for EMAs, hardly more than 16 */
0271
0272 /*
0273  * Number of elements in the circular buffer: If it happens it was
0274  * flushed before, then the number of elements could be smaller than
0275  * IRQ_TIMINGS_SIZE, so the count is used, otherwise the array size is
0276  * used as we wrapped. The index begins from zero when we did not
0277  * wrap. That could be done in a nicer way with the proper circular
0278  * array structure type but with the cost of extra computation in the
0279  * interrupt handler hot path. We choose efficiency.
0280  */
0281 #define for_each_irqts(i, irqts)                    \
0282     for (i = irqts->count < IRQ_TIMINGS_SIZE ?          \
0283              0 : irqts->count & IRQ_TIMINGS_MASK,       \
0284              irqts->count = min(IRQ_TIMINGS_SIZE,       \
0285                     irqts->count);          \
0286          irqts->count > 0; irqts->count--,              \
0287              i = (i + 1) & IRQ_TIMINGS_MASK)
0288
0289 struct irqt_stat {
0290     u64 last_ts;
0291     u64 ema_time[PREDICTION_BUFFER_SIZE];
0292     int timings[IRQ_TIMINGS_SIZE];
0293     int circ_timings[IRQ_TIMINGS_SIZE];
0294     int count;
0295 };
0296
0297 /*
0298  * Exponential moving average computation
0299  */
0300 static u64 irq_timings_ema_new(u64 value, u64 ema_old)
0301 {
0302     s64 diff;
0303
0304     if (unlikely(!ema_old))
0305         return value;
0306
0307     diff = (value - ema_old) * EMA_ALPHA_VAL;
0308     /*
0309      * We can use a s64 type variable to be added with the u64
0310      * ema_old variable as this one will never have its topmost
0311      * bit set, it will be always smaller than 2^63 nanosec
0312      * interrupt interval (292 years).
0313      */
0314     return ema_old + (diff >> EMA_ALPHA_SHIFT);
0315 }
0316
0317 static int irq_timings_next_event_index(int *buffer, size_t len, int period_max)
0318 {
0319     int period;
0320
0321     /*
0322      * Move the beginning pointer to the end minus the max period x 3.
0323      * We are at the point we can begin searching the pattern
0324      */
0325     buffer = &buffer[len - (period_max * 3)];
0326
0327     /* Adjust the length to the maximum allowed period x 3 */
0328     len = period_max * 3;
0329
0330     /*
0331      * The buffer contains the suite of intervals, in a ilog2
0332      * basis, we are looking for a repetition. We point the
0333      * beginning of the search three times the length of the
0334      * period beginning at the end of the buffer. We do that for
0335      * each suffix.
0336      */
0337     for (period = period_max; period >= PREDICTION_PERIOD_MIN; period--) {
0338
0339         /*
0340          * The first comparison always succeed because the
0341          * suffix is deduced from the first n-period bytes of
0342          * the buffer and we compare the initial suffix with
0343          * itself, so we can skip the first iteration.
0344          */
0345         int idx = period;
0346         size_t size = period;
0347
0348         /*
0349          * We look if the suite with period 'i' repeat
0350          * itself. If it is truncated at the end, as it
0351          * repeats we can use the period to find out the next
0352          * element with the modulo.
0353          */
0354         while (!memcmp(buffer, &buffer[idx], size * sizeof(int))) {
0355
0356             /*
0357              * Move the index in a period basis
0358              */
0359             idx += size;
0360
0361             /*
0362              * If this condition is reached, all previous
0363              * memcmp were successful, so the period is
0364              * found.
0365              */
0366             if (idx == len)
0367                 return buffer[len % period];
0368
0369             /*
0370              * If the remaining elements to compare are
0371              * smaller than the period, readjust the size
0372              * of the comparison for the last iteration.
0373              */
0374             if (len - idx < period)
0375                 size = len - idx;
0376         }
0377     }
0378
0379     return -1;
0380 }
0381
0382 static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now)
0383 {
0384     int index, i, period_max, count, start, min = INT_MAX;
0385
0386     if ((now - irqs->last_ts) >= NSEC_PER_SEC) {
0387         irqs->count = irqs->last_ts = 0;
0388         return U64_MAX;
0389     }
0390
0391     /*
0392      * As we want to find three times the repetition, we need a
0393      * number of intervals greater or equal to three times the
0394      * maximum period, otherwise we truncate the max period.
0395      */
0396     period_max = irqs->count > (3 * PREDICTION_PERIOD_MAX) ?
0397         PREDICTION_PERIOD_MAX : irqs->count / 3;
0398
0399     /*
0400      * If we don't have enough irq timings for this prediction,
0401      * just bail out.
0402      */
0403     if (period_max <= PREDICTION_PERIOD_MIN)
0404         return U64_MAX;
0405
0406     /*
0407      * 'count' will depends if the circular buffer wrapped or not
0408      */
0409     count = irqs->count < IRQ_TIMINGS_SIZE ?
0410         irqs->count : IRQ_TIMINGS_SIZE;
0411
0412     start = irqs->count < IRQ_TIMINGS_SIZE ?
0413         0 : (irqs->count & IRQ_TIMINGS_MASK);
0414
0415     /*
0416      * Copy the content of the circular buffer into another buffer
0417      * in order to linearize the buffer instead of dealing with
0418      * wrapping indexes and shifted array which will be prone to
0419      * error and extremely difficult to debug.
0420      */
0421     for (i = 0; i < count; i++) {
0422         int index = (start + i) & IRQ_TIMINGS_MASK;
0423
0424         irqs->timings[i] = irqs->circ_timings[index];
0425         min = min_t(int, irqs->timings[i], min);
0426     }
0427
0428     index = irq_timings_next_event_index(irqs->timings, count, period_max);
0429     if (index < 0)
0430         return irqs->last_ts + irqs->ema_time[min];
0431
0432     return irqs->last_ts + irqs->ema_time[index];
0433 }
0434
0435 static __always_inline int irq_timings_interval_index(u64 interval)
0436 {
0437     /*
0438      * The PREDICTION_FACTOR increase the interval size for the
0439      * array of exponential average.
0440      */
0441     u64 interval_us = (interval >> 10) / PREDICTION_FACTOR;
0442
0443     return likely(interval_us) ? ilog2(interval_us) : 0;
0444 }
0445
0446 static __always_inline void __irq_timings_store(int irq, struct irqt_stat *irqs,
0447                         u64 interval)
0448 {
0449     int index;
0450
0451     /*
0452      * Get the index in the ema table for this interrupt.
0453      */
0454     index = irq_timings_interval_index(interval);
0455
0456     if (index > PREDICTION_BUFFER_SIZE - 1) {
0457         irqs->count = 0;
0458         return;
0459     }
0460
0461     /*
0462      * Store the index as an element of the pattern in another
0463      * circular array.
0464      */
0465     irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index;
0466
0467     irqs->ema_time[index] = irq_timings_ema_new(interval,
0468                             irqs->ema_time[index]);
0469
0470     irqs->count++;
0471 }
0472
0473 static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)
0474 {
0475     u64 old_ts = irqs->last_ts;
0476     u64 interval;
0477
0478     /*
0479      * The timestamps are absolute time values, we need to compute
0480      * the timing interval between two interrupts.
0481      */
0482     irqs->last_ts = ts;
0483
0484     /*
0485      * The interval type is u64 in order to deal with the same
0486      * type in our computation, that prevent mindfuck issues with
0487      * overflow, sign and division.
0488      */
0489     interval = ts - old_ts;
0490
0491     /*
0492      * The interrupt triggered more than one second apart, that
0493      * ends the sequence as predictable for our purpose. In this
0494      * case, assume we have the beginning of a sequence and the
0495      * timestamp is the first value. As it is impossible to
0496      * predict anything at this point, return.
0497      *
0498      * Note the first timestamp of the sequence will always fall
0499      * in this test because the old_ts is zero. That is what we
0500      * want as we need another timestamp to compute an interval.
0501      */
0502     if (interval >= NSEC_PER_SEC) {
0503         irqs->count = 0;
0504         return;
0505     }
0506
0507     __irq_timings_store(irq, irqs, interval);
0508 }
0509
0510 /**
0511  * irq_timings_next_event - Return when the next event is supposed to arrive
0512  *
0513  * During the last busy cycle, the number of interrupts is incremented
0514  * and stored in the irq_timings structure. This information is
0515  * necessary to:
0516  *
0517  * - know if the index in the table wrapped up:
0518  *
0519  *      If more than the array size interrupts happened during the
0520  *      last busy/idle cycle, the index wrapped up and we have to
0521  *      begin with the next element in the array which is the last one
0522  *      in the sequence, otherwise it is at the index 0.
0523  *
0524  * - have an indication of the interrupts activity on this CPU
0525  *   (eg. irq/sec)
0526  *
0527  * The values are 'consumed' after inserting in the statistical model,
0528  * thus the count is reinitialized.
0529  *
0530  * The array of values **must** be browsed in the time direction, the
0531  * timestamp must increase between an element and the next one.
0532  *
0533  * Returns a nanosec time based estimation of the earliest interrupt,
0534  * U64_MAX otherwise.
0535  */
0536 u64 irq_timings_next_event(u64 now)
0537 {
0538     struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
0539     struct irqt_stat *irqs;
0540     struct irqt_stat __percpu *s;
0541     u64 ts, next_evt = U64_MAX;
0542     int i, irq = 0;
0543
0544     /*
0545      * This function must be called with the local irq disabled in
0546      * order to prevent the timings circular buffer to be updated
0547      * while we are reading it.
0548      */
0549     lockdep_assert_irqs_disabled();
0550
0551     if (!irqts->count)
0552         return next_evt;
0553
0554     /*
0555      * Number of elements in the circular buffer: If it happens it
0556      * was flushed before, then the number of elements could be
0557      * smaller than IRQ_TIMINGS_SIZE, so the count is used,
0558      * otherwise the array size is used as we wrapped. The index
0559      * begins from zero when we did not wrap. That could be done
0560      * in a nicer way with the proper circular array structure
0561      * type but with the cost of extra computation in the
0562      * interrupt handler hot path. We choose efficiency.
0563      *
0564      * Inject measured irq/timestamp to the pattern prediction
0565      * model while decrementing the counter because we consume the
0566      * data from our circular buffer.
0567      */
0568     for_each_irqts(i, irqts) {
0569         irq = irq_timing_decode(irqts->values[i], &ts);
0570         s = idr_find(&irqt_stats, irq);
0571         if (s)
0572             irq_timings_store(irq, this_cpu_ptr(s), ts);
0573     }
0574
0575     /*
0576      * Look in the list of interrupts' statistics, the earliest
0577      * next event.
0578      */
0579     idr_for_each_entry(&irqt_stats, s, i) {
0580
0581         irqs = this_cpu_ptr(s);
0582
0583         ts = __irq_timings_next_event(irqs, i, now);
0584         if (ts <= now)
0585             return now;
0586
0587         if (ts < next_evt)
0588             next_evt = ts;
0589     }
0590
0591     return next_evt;
0592 }
0593
0594 void irq_timings_free(int irq)
0595 {
0596     struct irqt_stat __percpu *s;
0597
0598     s = idr_find(&irqt_stats, irq);
0599     if (s) {
0600         free_percpu(s);
0601         idr_remove(&irqt_stats, irq);
0602     }
0603 }
0604
0605 int irq_timings_alloc(int irq)
0606 {
0607     struct irqt_stat __percpu *s;
0608     int id;
0609
0610     /*
0611      * Some platforms can have the same private interrupt per cpu,
0612      * so this function may be called several times with the
0613      * same interrupt number. Just bail out in case the per cpu
0614      * stat structure is already allocated.
0615      */
0616     s = idr_find(&irqt_stats, irq);
0617     if (s)
0618         return 0;
0619
0620     s = alloc_percpu(*s);
0621     if (!s)
0622         return -ENOMEM;
0623
0624     idr_preload(GFP_KERNEL);
0625     id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT);
0626     idr_preload_end();
0627
0628     if (id < 0) {
0629         free_percpu(s);
0630         return id;
0631     }
0632
0633     return 0;
0634 }
0635
0636 #ifdef CONFIG_TEST_IRQ_TIMINGS
0637 struct timings_intervals {
0638     u64 *intervals;
0639     size_t count;
0640 };
0641
0642 /*
0643  * Intervals are given in nanosecond base
0644  */
0645 static u64 intervals0[] __initdata = {
0646     10000, 50000, 200000, 500000,
0647     10000, 50000, 200000, 500000,
0648     10000, 50000, 200000, 500000,
0649     10000, 50000, 200000, 500000,
0650     10000, 50000, 200000, 500000,
0651     10000, 50000, 200000, 500000,
0652     10000, 50000, 200000, 500000,
0653     10000, 50000, 200000, 500000,
0654     10000, 50000, 200000,
0655 };
0656
0657 static u64 intervals1[] __initdata = {
0658     223947000, 1240000, 1384000, 1386000, 1386000,
0659     217416000, 1236000, 1384000, 1386000, 1387000,
0660     214719000, 1241000, 1386000, 1387000, 1384000,
0661     213696000, 1234000, 1384000, 1386000, 1388000,
0662     219904000, 1240000, 1385000, 1389000, 1385000,
0663     212240000, 1240000, 1386000, 1386000, 1386000,
0664     214415000, 1236000, 1384000, 1386000, 1387000,
0665     214276000, 1234000,
0666 };
0667
0668 static u64 intervals2[] __initdata = {
0669     4000, 3000, 5000, 100000,
0670     3000, 3000, 5000, 117000,
0671     4000, 4000, 5000, 112000,
0672     4000, 3000, 4000, 110000,
0673     3000, 5000, 3000, 117000,
0674     4000, 4000, 5000, 112000,
0675     4000, 3000, 4000, 110000,
0676     3000, 4000, 5000, 112000,
0677     4000,
0678 };
0679
0680 static u64 intervals3[] __initdata = {
0681     1385000, 212240000, 1240000,
0682     1386000, 214415000, 1236000,
0683     1384000, 214276000, 1234000,
0684     1386000, 214415000, 1236000,
0685     1385000, 212240000, 1240000,
0686     1386000, 214415000, 1236000,
0687     1384000, 214276000, 1234000,
0688     1386000, 214415000, 1236000,
0689     1385000, 212240000, 1240000,
0690 };
0691
0692 static u64 intervals4[] __initdata = {
0693     10000, 50000, 10000, 50000,
0694     10000, 50000, 10000, 50000,
0695     10000, 50000, 10000, 50000,
0696     10000, 50000, 10000, 50000,
0697     10000, 50000, 10000, 50000,
0698     10000, 50000, 10000, 50000,
0699     10000, 50000, 10000, 50000,
0700     10000, 50000, 10000, 50000,
0701     10000,
0702 };
0703
0704 static struct timings_intervals tis[] __initdata = {
0705     { intervals0, ARRAY_SIZE(intervals0) },
0706     { intervals1, ARRAY_SIZE(intervals1) },
0707     { intervals2, ARRAY_SIZE(intervals2) },
0708     { intervals3, ARRAY_SIZE(intervals3) },
0709     { intervals4, ARRAY_SIZE(intervals4) },
0710 };
0711
0712 static int __init irq_timings_test_next_index(struct timings_intervals *ti)
0713 {
0714     int _buffer[IRQ_TIMINGS_SIZE];
0715     int buffer[IRQ_TIMINGS_SIZE];
0716     int index, start, i, count, period_max;
0717
0718     count = ti->count - 1;
0719
0720     period_max = count > (3 * PREDICTION_PERIOD_MAX) ?
0721         PREDICTION_PERIOD_MAX : count / 3;
0722
0723     /*
0724      * Inject all values except the last one which will be used
0725      * to compare with the next index result.
0726      */
0727     pr_debug("index suite: ");
0728
0729     for (i = 0; i < count; i++) {
0730         index = irq_timings_interval_index(ti->intervals[i]);
0731         _buffer[i & IRQ_TIMINGS_MASK] = index;
0732         pr_cont("%d ", index);
0733     }
0734
0735     start = count < IRQ_TIMINGS_SIZE ? 0 :
0736         count & IRQ_TIMINGS_MASK;
0737
0738     count = min_t(int, count, IRQ_TIMINGS_SIZE);
0739
0740     for (i = 0; i < count; i++) {
0741         int index = (start + i) & IRQ_TIMINGS_MASK;
0742         buffer[i] = _buffer[index];
0743     }
0744
0745     index = irq_timings_next_event_index(buffer, count, period_max);
0746     i = irq_timings_interval_index(ti->intervals[ti->count - 1]);
0747
0748     if (index != i) {
0749         pr_err("Expected (%d) and computed (%d) next indexes differ\n",
0750                i, index);
0751         return -EINVAL;
0752     }
0753
0754     return 0;
0755 }
0756
0757 static int __init irq_timings_next_index_selftest(void)
0758 {
0759     int i, ret;
0760
0761     for (i = 0; i < ARRAY_SIZE(tis); i++) {
0762
0763         pr_info("---> Injecting intervals number #%d (count=%zd)\n",
0764             i, tis[i].count);
0765
0766         ret = irq_timings_test_next_index(&tis[i]);
0767         if (ret)
0768             break;
0769     }
0770
0771     return ret;
0772 }
0773
0774 static int __init irq_timings_test_irqs(struct timings_intervals *ti)
0775 {
0776     struct irqt_stat __percpu *s;
0777     struct irqt_stat *irqs;
0778     int i, index, ret, irq = 0xACE5;
0779
0780     ret = irq_timings_alloc(irq);
0781     if (ret) {
0782         pr_err("Failed to allocate irq timings\n");
0783         return ret;
0784     }
0785
0786     s = idr_find(&irqt_stats, irq);
0787     if (!s) {
0788         ret = -EIDRM;
0789         goto out;
0790     }
0791
0792     irqs = this_cpu_ptr(s);
0793
0794     for (i = 0; i < ti->count; i++) {
0795
0796         index = irq_timings_interval_index(ti->intervals[i]);
0797         pr_debug("%d: interval=%llu ema_index=%d\n",
0798              i, ti->intervals[i], index);
0799
0800         __irq_timings_store(irq, irqs, ti->intervals[i]);
0801         if (irqs->circ_timings[i & IRQ_TIMINGS_MASK] != index) {
0802             ret = -EBADSLT;
0803             pr_err("Failed to store in the circular buffer\n");
0804             goto out;
0805         }
0806     }
0807
0808     if (irqs->count != ti->count) {
0809         ret = -ERANGE;
0810         pr_err("Count differs\n");
0811         goto out;
0812     }
0813
0814     ret = 0;
0815 out:
0816     irq_timings_free(irq);
0817
0818     return ret;
0819 }
0820
0821 static int __init irq_timings_irqs_selftest(void)
0822 {
0823     int i, ret;
0824
0825     for (i = 0; i < ARRAY_SIZE(tis); i++) {
0826         pr_info("---> Injecting intervals number #%d (count=%zd)\n",
0827             i, tis[i].count);
0828         ret = irq_timings_test_irqs(&tis[i]);
0829         if (ret)
0830             break;
0831     }
0832
0833     return ret;
0834 }
0835
0836 static int __init irq_timings_test_irqts(struct irq_timings *irqts,
0837                      unsigned count)
0838 {
0839     int start = count >= IRQ_TIMINGS_SIZE ? count - IRQ_TIMINGS_SIZE : 0;
0840     int i, irq, oirq = 0xBEEF;
0841     u64 ots = 0xDEAD, ts;
0842
0843     /*
0844      * Fill the circular buffer by using the dedicated function.
0845      */
0846     for (i = 0; i < count; i++) {
0847         pr_debug("%d: index=%d, ts=%llX irq=%X\n",
0848              i, i & IRQ_TIMINGS_MASK, ots + i, oirq + i);
0849
0850         irq_timings_push(ots + i, oirq + i);
0851     }
0852
0853     /*
0854      * Compute the first elements values after the index wrapped
0855      * up or not.
0856      */
0857     ots += start;
0858     oirq += start;
0859
0860     /*
0861      * Test the circular buffer count is correct.
0862      */
0863     pr_debug("---> Checking timings array count (%d) is right\n", count);
0864     if (WARN_ON(irqts->count != count))
0865         return -EINVAL;
0866
0867     /*
0868      * Test the macro allowing to browse all the irqts.
0869      */
0870     pr_debug("---> Checking the for_each_irqts() macro\n");
0871     for_each_irqts(i, irqts) {
0872
0873         irq = irq_timing_decode(irqts->values[i], &ts);
0874
0875         pr_debug("index=%d, ts=%llX / %llX, irq=%X / %X\n",
0876              i, ts, ots, irq, oirq);
0877
0878         if (WARN_ON(ts != ots || irq != oirq))
0879             return -EINVAL;
0880
0881         ots++; oirq++;
0882     }
0883
0884     /*
0885      * The circular buffer should have be flushed when browsed
0886      * with for_each_irqts
0887      */
0888     pr_debug("---> Checking timings array is empty after browsing it\n");
0889     if (WARN_ON(irqts->count))
0890         return -EINVAL;
0891
0892     return 0;
0893 }
0894
0895 static int __init irq_timings_irqts_selftest(void)
0896 {
0897     struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
0898     int i, ret;
0899
0900     /*
0901      * Test the circular buffer with different number of
0902      * elements. The purpose is to test at the limits (empty, half
0903      * full, full, wrapped with the cursor at the boundaries,
0904      * wrapped several times, etc ...
0905      */
0906     int count[] = { 0,
0907             IRQ_TIMINGS_SIZE >> 1,
0908             IRQ_TIMINGS_SIZE,
0909             IRQ_TIMINGS_SIZE + (IRQ_TIMINGS_SIZE >> 1),
0910             2 * IRQ_TIMINGS_SIZE,
0911             (2 * IRQ_TIMINGS_SIZE) + 3,
0912     };
0913
0914     for (i = 0; i < ARRAY_SIZE(count); i++) {
0915
0916         pr_info("---> Checking the timings with %d/%d values\n",
0917             count[i], IRQ_TIMINGS_SIZE);
0918
0919         ret = irq_timings_test_irqts(irqts, count[i]);
0920         if (ret)
0921             break;
0922     }
0923
0924     return ret;
0925 }
0926
0927 static int __init irq_timings_selftest(void)
0928 {
0929     int ret;
0930
0931     pr_info("------------------- selftest start -----------------\n");
0932
0933     /*
0934      * At this point, we don't except any subsystem to use the irq
0935      * timings but us, so it should not be enabled.
0936      */
0937     if (static_branch_unlikely(&irq_timing_enabled)) {
0938         pr_warn("irq timings already initialized, skipping selftest\n");
0939         return 0;
0940     }
0941
0942     ret = irq_timings_irqts_selftest();
0943     if (ret)
0944         goto out;
0945
0946     ret = irq_timings_irqs_selftest();
0947     if (ret)
0948         goto out;
0949
0950     ret = irq_timings_next_index_selftest();
0951 out:
0952     pr_info("---------- selftest end with %s -----------\n",
0953         ret ? "failure" : "success");
0954
0955     return ret;
0956 }
0957 early_initcall(irq_timings_selftest);
0958 #endif