Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  *  Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
0004  *  Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
0005  *  Copyright(C) 2006-2007  Timesys Corp., Thomas Gleixner
0006  *
0007  *  High-resolution kernel timers
0008  *
0009  *  In contrast to the low-resolution timeout API, aka timer wheel,
0010  *  hrtimers provide finer resolution and accuracy depending on system
0011  *  configuration and capabilities.
0012  *
0013  *  Started by: Thomas Gleixner and Ingo Molnar
0014  *
0015  *  Credits:
0016  *  Based on the original timer wheel code
0017  *
0018  *  Help, testing, suggestions, bugfixes, improvements were
0019  *  provided by:
0020  *
0021  *  George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
0022  *  et. al.
0023  */
0024 
0025 #include <linux/cpu.h>
0026 #include <linux/export.h>
0027 #include <linux/percpu.h>
0028 #include <linux/hrtimer.h>
0029 #include <linux/notifier.h>
0030 #include <linux/syscalls.h>
0031 #include <linux/interrupt.h>
0032 #include <linux/tick.h>
0033 #include <linux/err.h>
0034 #include <linux/debugobjects.h>
0035 #include <linux/sched/signal.h>
0036 #include <linux/sched/sysctl.h>
0037 #include <linux/sched/rt.h>
0038 #include <linux/sched/deadline.h>
0039 #include <linux/sched/nohz.h>
0040 #include <linux/sched/debug.h>
0041 #include <linux/timer.h>
0042 #include <linux/freezer.h>
0043 #include <linux/compat.h>
0044 
0045 #include <linux/uaccess.h>
0046 
0047 #include <trace/events/timer.h>
0048 
0049 #include "tick-internal.h"
0050 
0051 /*
0052  * Masks for selecting the soft and hard context timers from
0053  * cpu_base->active
0054  */
0055 #define MASK_SHIFT      (HRTIMER_BASE_MONOTONIC_SOFT)
0056 #define HRTIMER_ACTIVE_HARD ((1U << MASK_SHIFT) - 1)
0057 #define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT)
0058 #define HRTIMER_ACTIVE_ALL  (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD)
0059 
0060 /*
0061  * The timer bases:
0062  *
0063  * There are more clockids than hrtimer bases. Thus, we index
0064  * into the timer bases by the hrtimer_base_type enum. When trying
0065  * to reach a base using a clockid, hrtimer_clockid_to_base()
0066  * is used to convert from clockid to the proper hrtimer_base_type.
0067  */
0068 DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
0069 {
0070     .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
0071     .clock_base =
0072     {
0073         {
0074             .index = HRTIMER_BASE_MONOTONIC,
0075             .clockid = CLOCK_MONOTONIC,
0076             .get_time = &ktime_get,
0077         },
0078         {
0079             .index = HRTIMER_BASE_REALTIME,
0080             .clockid = CLOCK_REALTIME,
0081             .get_time = &ktime_get_real,
0082         },
0083         {
0084             .index = HRTIMER_BASE_BOOTTIME,
0085             .clockid = CLOCK_BOOTTIME,
0086             .get_time = &ktime_get_boottime,
0087         },
0088         {
0089             .index = HRTIMER_BASE_TAI,
0090             .clockid = CLOCK_TAI,
0091             .get_time = &ktime_get_clocktai,
0092         },
0093         {
0094             .index = HRTIMER_BASE_MONOTONIC_SOFT,
0095             .clockid = CLOCK_MONOTONIC,
0096             .get_time = &ktime_get,
0097         },
0098         {
0099             .index = HRTIMER_BASE_REALTIME_SOFT,
0100             .clockid = CLOCK_REALTIME,
0101             .get_time = &ktime_get_real,
0102         },
0103         {
0104             .index = HRTIMER_BASE_BOOTTIME_SOFT,
0105             .clockid = CLOCK_BOOTTIME,
0106             .get_time = &ktime_get_boottime,
0107         },
0108         {
0109             .index = HRTIMER_BASE_TAI_SOFT,
0110             .clockid = CLOCK_TAI,
0111             .get_time = &ktime_get_clocktai,
0112         },
0113     }
0114 };
0115 
0116 static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
0117     /* Make sure we catch unsupported clockids */
0118     [0 ... MAX_CLOCKS - 1]  = HRTIMER_MAX_CLOCK_BASES,
0119 
0120     [CLOCK_REALTIME]    = HRTIMER_BASE_REALTIME,
0121     [CLOCK_MONOTONIC]   = HRTIMER_BASE_MONOTONIC,
0122     [CLOCK_BOOTTIME]    = HRTIMER_BASE_BOOTTIME,
0123     [CLOCK_TAI]     = HRTIMER_BASE_TAI,
0124 };
0125 
0126 /*
0127  * Functions and macros which are different for UP/SMP systems are kept in a
0128  * single place
0129  */
0130 #ifdef CONFIG_SMP
0131 
0132 /*
0133  * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base()
0134  * such that hrtimer_callback_running() can unconditionally dereference
0135  * timer->base->cpu_base
0136  */
0137 static struct hrtimer_cpu_base migration_cpu_base = {
0138     .clock_base = { {
0139         .cpu_base = &migration_cpu_base,
0140         .seq      = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq,
0141                              &migration_cpu_base.lock),
0142     }, },
0143 };
0144 
0145 #define migration_base  migration_cpu_base.clock_base[0]
0146 
0147 static inline bool is_migration_base(struct hrtimer_clock_base *base)
0148 {
0149     return base == &migration_base;
0150 }
0151 
0152 /*
0153  * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
0154  * means that all timers which are tied to this base via timer->base are
0155  * locked, and the base itself is locked too.
0156  *
0157  * So __run_timers/migrate_timers can safely modify all timers which could
0158  * be found on the lists/queues.
0159  *
0160  * When the timer's base is locked, and the timer removed from list, it is
0161  * possible to set timer->base = &migration_base and drop the lock: the timer
0162  * remains locked.
0163  */
0164 static
0165 struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
0166                          unsigned long *flags)
0167 {
0168     struct hrtimer_clock_base *base;
0169 
0170     for (;;) {
0171         base = READ_ONCE(timer->base);
0172         if (likely(base != &migration_base)) {
0173             raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
0174             if (likely(base == timer->base))
0175                 return base;
0176             /* The timer has migrated to another CPU: */
0177             raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
0178         }
0179         cpu_relax();
0180     }
0181 }
0182 
0183 /*
0184  * We do not migrate the timer when it is expiring before the next
0185  * event on the target cpu. When high resolution is enabled, we cannot
0186  * reprogram the target cpu hardware and we would cause it to fire
0187  * late. To keep it simple, we handle the high resolution enabled and
0188  * disabled case similar.
0189  *
0190  * Called with cpu_base->lock of target cpu held.
0191  */
0192 static int
0193 hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
0194 {
0195     ktime_t expires;
0196 
0197     expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
0198     return expires < new_base->cpu_base->expires_next;
0199 }
0200 
0201 static inline
0202 struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
0203                      int pinned)
0204 {
0205 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
0206     if (static_branch_likely(&timers_migration_enabled) && !pinned)
0207         return &per_cpu(hrtimer_bases, get_nohz_timer_target());
0208 #endif
0209     return base;
0210 }
0211 
0212 /*
0213  * We switch the timer base to a power-optimized selected CPU target,
0214  * if:
0215  *  - NO_HZ_COMMON is enabled
0216  *  - timer migration is enabled
0217  *  - the timer callback is not running
0218  *  - the timer is not the first expiring timer on the new target
0219  *
0220  * If one of the above requirements is not fulfilled we move the timer
0221  * to the current CPU or leave it on the previously assigned CPU if
0222  * the timer callback is currently running.
0223  */
0224 static inline struct hrtimer_clock_base *
0225 switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
0226             int pinned)
0227 {
0228     struct hrtimer_cpu_base *new_cpu_base, *this_cpu_base;
0229     struct hrtimer_clock_base *new_base;
0230     int basenum = base->index;
0231 
0232     this_cpu_base = this_cpu_ptr(&hrtimer_bases);
0233     new_cpu_base = get_target_base(this_cpu_base, pinned);
0234 again:
0235     new_base = &new_cpu_base->clock_base[basenum];
0236 
0237     if (base != new_base) {
0238         /*
0239          * We are trying to move timer to new_base.
0240          * However we can't change timer's base while it is running,
0241          * so we keep it on the same CPU. No hassle vs. reprogramming
0242          * the event source in the high resolution case. The softirq
0243          * code will take care of this when the timer function has
0244          * completed. There is no conflict as we hold the lock until
0245          * the timer is enqueued.
0246          */
0247         if (unlikely(hrtimer_callback_running(timer)))
0248             return base;
0249 
0250         /* See the comment in lock_hrtimer_base() */
0251         WRITE_ONCE(timer->base, &migration_base);
0252         raw_spin_unlock(&base->cpu_base->lock);
0253         raw_spin_lock(&new_base->cpu_base->lock);
0254 
0255         if (new_cpu_base != this_cpu_base &&
0256             hrtimer_check_target(timer, new_base)) {
0257             raw_spin_unlock(&new_base->cpu_base->lock);
0258             raw_spin_lock(&base->cpu_base->lock);
0259             new_cpu_base = this_cpu_base;
0260             WRITE_ONCE(timer->base, base);
0261             goto again;
0262         }
0263         WRITE_ONCE(timer->base, new_base);
0264     } else {
0265         if (new_cpu_base != this_cpu_base &&
0266             hrtimer_check_target(timer, new_base)) {
0267             new_cpu_base = this_cpu_base;
0268             goto again;
0269         }
0270     }
0271     return new_base;
0272 }
0273 
0274 #else /* CONFIG_SMP */
0275 
0276 static inline bool is_migration_base(struct hrtimer_clock_base *base)
0277 {
0278     return false;
0279 }
0280 
0281 static inline struct hrtimer_clock_base *
0282 lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
0283 {
0284     struct hrtimer_clock_base *base = timer->base;
0285 
0286     raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
0287 
0288     return base;
0289 }
0290 
0291 # define switch_hrtimer_base(t, b, p)   (b)
0292 
0293 #endif  /* !CONFIG_SMP */
0294 
0295 /*
0296  * Functions for the union type storage format of ktime_t which are
0297  * too large for inlining:
0298  */
0299 #if BITS_PER_LONG < 64
0300 /*
0301  * Divide a ktime value by a nanosecond value
0302  */
0303 s64 __ktime_divns(const ktime_t kt, s64 div)
0304 {
0305     int sft = 0;
0306     s64 dclc;
0307     u64 tmp;
0308 
0309     dclc = ktime_to_ns(kt);
0310     tmp = dclc < 0 ? -dclc : dclc;
0311 
0312     /* Make sure the divisor is less than 2^32: */
0313     while (div >> 32) {
0314         sft++;
0315         div >>= 1;
0316     }
0317     tmp >>= sft;
0318     do_div(tmp, (u32) div);
0319     return dclc < 0 ? -tmp : tmp;
0320 }
0321 EXPORT_SYMBOL_GPL(__ktime_divns);
0322 #endif /* BITS_PER_LONG >= 64 */
0323 
0324 /*
0325  * Add two ktime values and do a safety check for overflow:
0326  */
0327 ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
0328 {
0329     ktime_t res = ktime_add_unsafe(lhs, rhs);
0330 
0331     /*
0332      * We use KTIME_SEC_MAX here, the maximum timeout which we can
0333      * return to user space in a timespec:
0334      */
0335     if (res < 0 || res < lhs || res < rhs)
0336         res = ktime_set(KTIME_SEC_MAX, 0);
0337 
0338     return res;
0339 }
0340 
0341 EXPORT_SYMBOL_GPL(ktime_add_safe);
0342 
0343 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
0344 
0345 static const struct debug_obj_descr hrtimer_debug_descr;
0346 
0347 static void *hrtimer_debug_hint(void *addr)
0348 {
0349     return ((struct hrtimer *) addr)->function;
0350 }
0351 
0352 /*
0353  * fixup_init is called when:
0354  * - an active object is initialized
0355  */
0356 static bool hrtimer_fixup_init(void *addr, enum debug_obj_state state)
0357 {
0358     struct hrtimer *timer = addr;
0359 
0360     switch (state) {
0361     case ODEBUG_STATE_ACTIVE:
0362         hrtimer_cancel(timer);
0363         debug_object_init(timer, &hrtimer_debug_descr);
0364         return true;
0365     default:
0366         return false;
0367     }
0368 }
0369 
0370 /*
0371  * fixup_activate is called when:
0372  * - an active object is activated
0373  * - an unknown non-static object is activated
0374  */
0375 static bool hrtimer_fixup_activate(void *addr, enum debug_obj_state state)
0376 {
0377     switch (state) {
0378     case ODEBUG_STATE_ACTIVE:
0379         WARN_ON(1);
0380         fallthrough;
0381     default:
0382         return false;
0383     }
0384 }
0385 
0386 /*
0387  * fixup_free is called when:
0388  * - an active object is freed
0389  */
0390 static bool hrtimer_fixup_free(void *addr, enum debug_obj_state state)
0391 {
0392     struct hrtimer *timer = addr;
0393 
0394     switch (state) {
0395     case ODEBUG_STATE_ACTIVE:
0396         hrtimer_cancel(timer);
0397         debug_object_free(timer, &hrtimer_debug_descr);
0398         return true;
0399     default:
0400         return false;
0401     }
0402 }
0403 
0404 static const struct debug_obj_descr hrtimer_debug_descr = {
0405     .name       = "hrtimer",
0406     .debug_hint = hrtimer_debug_hint,
0407     .fixup_init = hrtimer_fixup_init,
0408     .fixup_activate = hrtimer_fixup_activate,
0409     .fixup_free = hrtimer_fixup_free,
0410 };
0411 
0412 static inline void debug_hrtimer_init(struct hrtimer *timer)
0413 {
0414     debug_object_init(timer, &hrtimer_debug_descr);
0415 }
0416 
0417 static inline void debug_hrtimer_activate(struct hrtimer *timer,
0418                       enum hrtimer_mode mode)
0419 {
0420     debug_object_activate(timer, &hrtimer_debug_descr);
0421 }
0422 
0423 static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
0424 {
0425     debug_object_deactivate(timer, &hrtimer_debug_descr);
0426 }
0427 
0428 static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
0429                enum hrtimer_mode mode);
0430 
0431 void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
0432                enum hrtimer_mode mode)
0433 {
0434     debug_object_init_on_stack(timer, &hrtimer_debug_descr);
0435     __hrtimer_init(timer, clock_id, mode);
0436 }
0437 EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
0438 
0439 static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
0440                    clockid_t clock_id, enum hrtimer_mode mode);
0441 
0442 void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
0443                    clockid_t clock_id, enum hrtimer_mode mode)
0444 {
0445     debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr);
0446     __hrtimer_init_sleeper(sl, clock_id, mode);
0447 }
0448 EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack);
0449 
0450 void destroy_hrtimer_on_stack(struct hrtimer *timer)
0451 {
0452     debug_object_free(timer, &hrtimer_debug_descr);
0453 }
0454 EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack);
0455 
0456 #else
0457 
0458 static inline void debug_hrtimer_init(struct hrtimer *timer) { }
0459 static inline void debug_hrtimer_activate(struct hrtimer *timer,
0460                       enum hrtimer_mode mode) { }
0461 static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
0462 #endif
0463 
0464 static inline void
0465 debug_init(struct hrtimer *timer, clockid_t clockid,
0466        enum hrtimer_mode mode)
0467 {
0468     debug_hrtimer_init(timer);
0469     trace_hrtimer_init(timer, clockid, mode);
0470 }
0471 
0472 static inline void debug_activate(struct hrtimer *timer,
0473                   enum hrtimer_mode mode)
0474 {
0475     debug_hrtimer_activate(timer, mode);
0476     trace_hrtimer_start(timer, mode);
0477 }
0478 
0479 static inline void debug_deactivate(struct hrtimer *timer)
0480 {
0481     debug_hrtimer_deactivate(timer);
0482     trace_hrtimer_cancel(timer);
0483 }
0484 
0485 static struct hrtimer_clock_base *
0486 __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active)
0487 {
0488     unsigned int idx;
0489 
0490     if (!*active)
0491         return NULL;
0492 
0493     idx = __ffs(*active);
0494     *active &= ~(1U << idx);
0495 
0496     return &cpu_base->clock_base[idx];
0497 }
0498 
0499 #define for_each_active_base(base, cpu_base, active)    \
0500     while ((base = __next_base((cpu_base), &(active))))
0501 
0502 static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
0503                      const struct hrtimer *exclude,
0504                      unsigned int active,
0505                      ktime_t expires_next)
0506 {
0507     struct hrtimer_clock_base *base;
0508     ktime_t expires;
0509 
0510     for_each_active_base(base, cpu_base, active) {
0511         struct timerqueue_node *next;
0512         struct hrtimer *timer;
0513 
0514         next = timerqueue_getnext(&base->active);
0515         timer = container_of(next, struct hrtimer, node);
0516         if (timer == exclude) {
0517             /* Get to the next timer in the queue. */
0518             next = timerqueue_iterate_next(next);
0519             if (!next)
0520                 continue;
0521 
0522             timer = container_of(next, struct hrtimer, node);
0523         }
0524         expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
0525         if (expires < expires_next) {
0526             expires_next = expires;
0527 
0528             /* Skip cpu_base update if a timer is being excluded. */
0529             if (exclude)
0530                 continue;
0531 
0532             if (timer->is_soft)
0533                 cpu_base->softirq_next_timer = timer;
0534             else
0535                 cpu_base->next_timer = timer;
0536         }
0537     }
0538     /*
0539      * clock_was_set() might have changed base->offset of any of
0540      * the clock bases so the result might be negative. Fix it up
0541      * to prevent a false positive in clockevents_program_event().
0542      */
0543     if (expires_next < 0)
0544         expires_next = 0;
0545     return expires_next;
0546 }
0547 
0548 /*
0549  * Recomputes cpu_base::*next_timer and returns the earliest expires_next
0550  * but does not set cpu_base::*expires_next, that is done by
0551  * hrtimer[_force]_reprogram and hrtimer_interrupt only. When updating
0552  * cpu_base::*expires_next right away, reprogramming logic would no longer
0553  * work.
0554  *
0555  * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases,
0556  * those timers will get run whenever the softirq gets handled, at the end of
0557  * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases.
0558  *
0559  * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases.
0560  * The !softirq values are the minima across HRTIMER_ACTIVE_ALL, unless an actual
0561  * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD.
0562  *
0563  * @active_mask must be one of:
0564  *  - HRTIMER_ACTIVE_ALL,
0565  *  - HRTIMER_ACTIVE_SOFT, or
0566  *  - HRTIMER_ACTIVE_HARD.
0567  */
0568 static ktime_t
0569 __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask)
0570 {
0571     unsigned int active;
0572     struct hrtimer *next_timer = NULL;
0573     ktime_t expires_next = KTIME_MAX;
0574 
0575     if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) {
0576         active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
0577         cpu_base->softirq_next_timer = NULL;
0578         expires_next = __hrtimer_next_event_base(cpu_base, NULL,
0579                              active, KTIME_MAX);
0580 
0581         next_timer = cpu_base->softirq_next_timer;
0582     }
0583 
0584     if (active_mask & HRTIMER_ACTIVE_HARD) {
0585         active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
0586         cpu_base->next_timer = next_timer;
0587         expires_next = __hrtimer_next_event_base(cpu_base, NULL, active,
0588                              expires_next);
0589     }
0590 
0591     return expires_next;
0592 }
0593 
0594 static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base)
0595 {
0596     ktime_t expires_next, soft = KTIME_MAX;
0597 
0598     /*
0599      * If the soft interrupt has already been activated, ignore the
0600      * soft bases. They will be handled in the already raised soft
0601      * interrupt.
0602      */
0603     if (!cpu_base->softirq_activated) {
0604         soft = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
0605         /*
0606          * Update the soft expiry time. clock_settime() might have
0607          * affected it.
0608          */
0609         cpu_base->softirq_expires_next = soft;
0610     }
0611 
0612     expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD);
0613     /*
0614      * If a softirq timer is expiring first, update cpu_base->next_timer
0615      * and program the hardware with the soft expiry time.
0616      */
0617     if (expires_next > soft) {
0618         cpu_base->next_timer = cpu_base->softirq_next_timer;
0619         expires_next = soft;
0620     }
0621 
0622     return expires_next;
0623 }
0624 
0625 static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
0626 {
0627     ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
0628     ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
0629     ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
0630 
0631     ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq,
0632                         offs_real, offs_boot, offs_tai);
0633 
0634     base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real;
0635     base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot;
0636     base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai;
0637 
0638     return now;
0639 }
0640 
0641 /*
0642  * Is the high resolution mode active ?
0643  */
0644 static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base)
0645 {
0646     return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ?
0647         cpu_base->hres_active : 0;
0648 }
0649 
0650 static inline int hrtimer_hres_active(void)
0651 {
0652     return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
0653 }
0654 
0655 static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base,
0656                 struct hrtimer *next_timer,
0657                 ktime_t expires_next)
0658 {
0659     cpu_base->expires_next = expires_next;
0660 
0661     /*
0662      * If hres is not active, hardware does not have to be
0663      * reprogrammed yet.
0664      *
0665      * If a hang was detected in the last timer interrupt then we
0666      * leave the hang delay active in the hardware. We want the
0667      * system to make progress. That also prevents the following
0668      * scenario:
0669      * T1 expires 50ms from now
0670      * T2 expires 5s from now
0671      *
0672      * T1 is removed, so this code is called and would reprogram
0673      * the hardware to 5s from now. Any hrtimer_start after that
0674      * will not reprogram the hardware due to hang_detected being
0675      * set. So we'd effectively block all timers until the T2 event
0676      * fires.
0677      */
0678     if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
0679         return;
0680 
0681     tick_program_event(expires_next, 1);
0682 }
0683 
0684 /*
0685  * Reprogram the event source with checking both queues for the
0686  * next event
0687  * Called with interrupts disabled and base->lock held
0688  */
0689 static void
0690 hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
0691 {
0692     ktime_t expires_next;
0693 
0694     expires_next = hrtimer_update_next_event(cpu_base);
0695 
0696     if (skip_equal && expires_next == cpu_base->expires_next)
0697         return;
0698 
0699     __hrtimer_reprogram(cpu_base, cpu_base->next_timer, expires_next);
0700 }
0701 
0702 /* High resolution timer related functions */
0703 #ifdef CONFIG_HIGH_RES_TIMERS
0704 
0705 /*
0706  * High resolution timer enabled ?
0707  */
0708 static bool hrtimer_hres_enabled __read_mostly  = true;
0709 unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC;
0710 EXPORT_SYMBOL_GPL(hrtimer_resolution);
0711 
0712 /*
0713  * Enable / Disable high resolution mode
0714  */
0715 static int __init setup_hrtimer_hres(char *str)
0716 {
0717     return (kstrtobool(str, &hrtimer_hres_enabled) == 0);
0718 }
0719 
0720 __setup("highres=", setup_hrtimer_hres);
0721 
0722 /*
0723  * hrtimer_high_res_enabled - query, if the highres mode is enabled
0724  */
0725 static inline int hrtimer_is_hres_enabled(void)
0726 {
0727     return hrtimer_hres_enabled;
0728 }
0729 
0730 static void retrigger_next_event(void *arg);
0731 
0732 /*
0733  * Switch to high resolution mode
0734  */
0735 static void hrtimer_switch_to_hres(void)
0736 {
0737     struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
0738 
0739     if (tick_init_highres()) {
0740         pr_warn("Could not switch to high resolution mode on CPU %u\n",
0741             base->cpu);
0742         return;
0743     }
0744     base->hres_active = 1;
0745     hrtimer_resolution = HIGH_RES_NSEC;
0746 
0747     tick_setup_sched_timer();
0748     /* "Retrigger" the interrupt to get things going */
0749     retrigger_next_event(NULL);
0750 }
0751 
0752 #else
0753 
0754 static inline int hrtimer_is_hres_enabled(void) { return 0; }
0755 static inline void hrtimer_switch_to_hres(void) { }
0756 
0757 #endif /* CONFIG_HIGH_RES_TIMERS */
0758 /*
0759  * Retrigger next event is called after clock was set with interrupts
0760  * disabled through an SMP function call or directly from low level
0761  * resume code.
0762  *
0763  * This is only invoked when:
0764  *  - CONFIG_HIGH_RES_TIMERS is enabled.
0765  *  - CONFIG_NOHZ_COMMON is enabled
0766  *
0767  * For the other cases this function is empty and because the call sites
0768  * are optimized out it vanishes as well, i.e. no need for lots of
0769  * #ifdeffery.
0770  */
0771 static void retrigger_next_event(void *arg)
0772 {
0773     struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
0774 
0775     /*
0776      * When high resolution mode or nohz is active, then the offsets of
0777      * CLOCK_REALTIME/TAI/BOOTTIME have to be updated. Otherwise the
0778      * next tick will take care of that.
0779      *
0780      * If high resolution mode is active then the next expiring timer
0781      * must be reevaluated and the clock event device reprogrammed if
0782      * necessary.
0783      *
0784      * In the NOHZ case the update of the offset and the reevaluation
0785      * of the next expiring timer is enough. The return from the SMP
0786      * function call will take care of the reprogramming in case the
0787      * CPU was in a NOHZ idle sleep.
0788      */
0789     if (!__hrtimer_hres_active(base) && !tick_nohz_active)
0790         return;
0791 
0792     raw_spin_lock(&base->lock);
0793     hrtimer_update_base(base);
0794     if (__hrtimer_hres_active(base))
0795         hrtimer_force_reprogram(base, 0);
0796     else
0797         hrtimer_update_next_event(base);
0798     raw_spin_unlock(&base->lock);
0799 }
0800 
0801 /*
0802  * When a timer is enqueued and expires earlier than the already enqueued
0803  * timers, we have to check, whether it expires earlier than the timer for
0804  * which the clock event device was armed.
0805  *
0806  * Called with interrupts disabled and base->cpu_base.lock held
0807  */
0808 static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram)
0809 {
0810     struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
0811     struct hrtimer_clock_base *base = timer->base;
0812     ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
0813 
0814     WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
0815 
0816     /*
0817      * CLOCK_REALTIME timer might be requested with an absolute
0818      * expiry time which is less than base->offset. Set it to 0.
0819      */
0820     if (expires < 0)
0821         expires = 0;
0822 
0823     if (timer->is_soft) {
0824         /*
0825          * soft hrtimer could be started on a remote CPU. In this
0826          * case softirq_expires_next needs to be updated on the
0827          * remote CPU. The soft hrtimer will not expire before the
0828          * first hard hrtimer on the remote CPU -
0829          * hrtimer_check_target() prevents this case.
0830          */
0831         struct hrtimer_cpu_base *timer_cpu_base = base->cpu_base;
0832 
0833         if (timer_cpu_base->softirq_activated)
0834             return;
0835 
0836         if (!ktime_before(expires, timer_cpu_base->softirq_expires_next))
0837             return;
0838 
0839         timer_cpu_base->softirq_next_timer = timer;
0840         timer_cpu_base->softirq_expires_next = expires;
0841 
0842         if (!ktime_before(expires, timer_cpu_base->expires_next) ||
0843             !reprogram)
0844             return;
0845     }
0846 
0847     /*
0848      * If the timer is not on the current cpu, we cannot reprogram
0849      * the other cpus clock event device.
0850      */
0851     if (base->cpu_base != cpu_base)
0852         return;
0853 
0854     if (expires >= cpu_base->expires_next)
0855         return;
0856 
0857     /*
0858      * If the hrtimer interrupt is running, then it will reevaluate the
0859      * clock bases and reprogram the clock event device.
0860      */
0861     if (cpu_base->in_hrtirq)
0862         return;
0863 
0864     cpu_base->next_timer = timer;
0865 
0866     __hrtimer_reprogram(cpu_base, timer, expires);
0867 }
0868 
0869 static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base,
0870                  unsigned int active)
0871 {
0872     struct hrtimer_clock_base *base;
0873     unsigned int seq;
0874     ktime_t expires;
0875 
0876     /*
0877      * Update the base offsets unconditionally so the following
0878      * checks whether the SMP function call is required works.
0879      *
0880      * The update is safe even when the remote CPU is in the hrtimer
0881      * interrupt or the hrtimer soft interrupt and expiring affected
0882      * bases. Either it will see the update before handling a base or
0883      * it will see it when it finishes the processing and reevaluates
0884      * the next expiring timer.
0885      */
0886     seq = cpu_base->clock_was_set_seq;
0887     hrtimer_update_base(cpu_base);
0888 
0889     /*
0890      * If the sequence did not change over the update then the
0891      * remote CPU already handled it.
0892      */
0893     if (seq == cpu_base->clock_was_set_seq)
0894         return false;
0895 
0896     /*
0897      * If the remote CPU is currently handling an hrtimer interrupt, it
0898      * will reevaluate the first expiring timer of all clock bases
0899      * before reprogramming. Nothing to do here.
0900      */
0901     if (cpu_base->in_hrtirq)
0902         return false;
0903 
0904     /*
0905      * Walk the affected clock bases and check whether the first expiring
0906      * timer in a clock base is moving ahead of the first expiring timer of
0907      * @cpu_base. If so, the IPI must be invoked because per CPU clock
0908      * event devices cannot be remotely reprogrammed.
0909      */
0910     active &= cpu_base->active_bases;
0911 
0912     for_each_active_base(base, cpu_base, active) {
0913         struct timerqueue_node *next;
0914 
0915         next = timerqueue_getnext(&base->active);
0916         expires = ktime_sub(next->expires, base->offset);
0917         if (expires < cpu_base->expires_next)
0918             return true;
0919 
0920         /* Extra check for softirq clock bases */
0921         if (base->clockid < HRTIMER_BASE_MONOTONIC_SOFT)
0922             continue;
0923         if (cpu_base->softirq_activated)
0924             continue;
0925         if (expires < cpu_base->softirq_expires_next)
0926             return true;
0927     }
0928     return false;
0929 }
0930 
0931 /*
0932  * Clock was set. This might affect CLOCK_REALTIME, CLOCK_TAI and
0933  * CLOCK_BOOTTIME (for late sleep time injection).
0934  *
0935  * This requires to update the offsets for these clocks
0936  * vs. CLOCK_MONOTONIC. When high resolution timers are enabled, then this
0937  * also requires to eventually reprogram the per CPU clock event devices
0938  * when the change moves an affected timer ahead of the first expiring
0939  * timer on that CPU. Obviously remote per CPU clock event devices cannot
0940  * be reprogrammed. The other reason why an IPI has to be sent is when the
0941  * system is in !HIGH_RES and NOHZ mode. The NOHZ mode updates the offsets
0942  * in the tick, which obviously might be stopped, so this has to bring out
0943  * the remote CPU which might sleep in idle to get this sorted.
0944  */
0945 void clock_was_set(unsigned int bases)
0946 {
0947     struct hrtimer_cpu_base *cpu_base = raw_cpu_ptr(&hrtimer_bases);
0948     cpumask_var_t mask;
0949     int cpu;
0950 
0951     if (!__hrtimer_hres_active(cpu_base) && !tick_nohz_active)
0952         goto out_timerfd;
0953 
0954     if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
0955         on_each_cpu(retrigger_next_event, NULL, 1);
0956         goto out_timerfd;
0957     }
0958 
0959     /* Avoid interrupting CPUs if possible */
0960     cpus_read_lock();
0961     for_each_online_cpu(cpu) {
0962         unsigned long flags;
0963 
0964         cpu_base = &per_cpu(hrtimer_bases, cpu);
0965         raw_spin_lock_irqsave(&cpu_base->lock, flags);
0966 
0967         if (update_needs_ipi(cpu_base, bases))
0968             cpumask_set_cpu(cpu, mask);
0969 
0970         raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
0971     }
0972 
0973     preempt_disable();
0974     smp_call_function_many(mask, retrigger_next_event, NULL, 1);
0975     preempt_enable();
0976     cpus_read_unlock();
0977     free_cpumask_var(mask);
0978 
0979 out_timerfd:
0980     timerfd_clock_was_set();
0981 }
0982 
0983 static void clock_was_set_work(struct work_struct *work)
0984 {
0985     clock_was_set(CLOCK_SET_WALL);
0986 }
0987 
0988 static DECLARE_WORK(hrtimer_work, clock_was_set_work);
0989 
0990 /*
0991  * Called from timekeeping code to reprogram the hrtimer interrupt device
0992  * on all cpus and to notify timerfd.
0993  */
0994 void clock_was_set_delayed(void)
0995 {
0996     schedule_work(&hrtimer_work);
0997 }
0998 
0999 /*
1000  * Called during resume either directly from via timekeeping_resume()
1001  * or in the case of s2idle from tick_unfreeze() to ensure that the
1002  * hrtimers are up to date.
1003  */
1004 void hrtimers_resume_local(void)
1005 {
1006     lockdep_assert_irqs_disabled();
1007     /* Retrigger on the local CPU */
1008     retrigger_next_event(NULL);
1009 }
1010 
1011 /*
1012  * Counterpart to lock_hrtimer_base above:
1013  */
1014 static inline
1015 void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
1016 {
1017     raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
1018 }
1019 
1020 /**
1021  * hrtimer_forward - forward the timer expiry
1022  * @timer:  hrtimer to forward
1023  * @now:    forward past this time
1024  * @interval:   the interval to forward
1025  *
1026  * Forward the timer expiry so it will expire in the future.
1027  * Returns the number of overruns.
1028  *
1029  * Can be safely called from the callback function of @timer. If
1030  * called from other contexts @timer must neither be enqueued nor
1031  * running the callback and the caller needs to take care of
1032  * serialization.
1033  *
1034  * Note: This only updates the timer expiry value and does not requeue
1035  * the timer.
1036  */
1037 u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
1038 {
1039     u64 orun = 1;
1040     ktime_t delta;
1041 
1042     delta = ktime_sub(now, hrtimer_get_expires(timer));
1043 
1044     if (delta < 0)
1045         return 0;
1046 
1047     if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED))
1048         return 0;
1049 
1050     if (interval < hrtimer_resolution)
1051         interval = hrtimer_resolution;
1052 
1053     if (unlikely(delta >= interval)) {
1054         s64 incr = ktime_to_ns(interval);
1055 
1056         orun = ktime_divns(delta, incr);
1057         hrtimer_add_expires_ns(timer, incr * orun);
1058         if (hrtimer_get_expires_tv64(timer) > now)
1059             return orun;
1060         /*
1061          * This (and the ktime_add() below) is the
1062          * correction for exact:
1063          */
1064         orun++;
1065     }
1066     hrtimer_add_expires(timer, interval);
1067 
1068     return orun;
1069 }
1070 EXPORT_SYMBOL_GPL(hrtimer_forward);
1071 
1072 /*
1073  * enqueue_hrtimer - internal function to (re)start a timer
1074  *
1075  * The timer is inserted in expiry order. Insertion into the
1076  * red black tree is O(log(n)). Must hold the base lock.
1077  *
1078  * Returns 1 when the new timer is the leftmost timer in the tree.
1079  */
1080 static int enqueue_hrtimer(struct hrtimer *timer,
1081                struct hrtimer_clock_base *base,
1082                enum hrtimer_mode mode)
1083 {
1084     debug_activate(timer, mode);
1085 
1086     base->cpu_base->active_bases |= 1 << base->index;
1087 
1088     /* Pairs with the lockless read in hrtimer_is_queued() */
1089     WRITE_ONCE(timer->state, HRTIMER_STATE_ENQUEUED);
1090 
1091     return timerqueue_add(&base->active, &timer->node);
1092 }
1093 
1094 /*
1095  * __remove_hrtimer - internal function to remove a timer
1096  *
1097  * Caller must hold the base lock.
1098  *
1099  * High resolution timer mode reprograms the clock event device when the
1100  * timer is the one which expires next. The caller can disable this by setting
1101  * reprogram to zero. This is useful, when the context does a reprogramming
1102  * anyway (e.g. timer interrupt)
1103  */
1104 static void __remove_hrtimer(struct hrtimer *timer,
1105                  struct hrtimer_clock_base *base,
1106                  u8 newstate, int reprogram)
1107 {
1108     struct hrtimer_cpu_base *cpu_base = base->cpu_base;
1109     u8 state = timer->state;
1110 
1111     /* Pairs with the lockless read in hrtimer_is_queued() */
1112     WRITE_ONCE(timer->state, newstate);
1113     if (!(state & HRTIMER_STATE_ENQUEUED))
1114         return;
1115 
1116     if (!timerqueue_del(&base->active, &timer->node))
1117         cpu_base->active_bases &= ~(1 << base->index);
1118 
1119     /*
1120      * Note: If reprogram is false we do not update
1121      * cpu_base->next_timer. This happens when we remove the first
1122      * timer on a remote cpu. No harm as we never dereference
1123      * cpu_base->next_timer. So the worst thing what can happen is
1124      * an superfluous call to hrtimer_force_reprogram() on the
1125      * remote cpu later on if the same timer gets enqueued again.
1126      */
1127     if (reprogram && timer == cpu_base->next_timer)
1128         hrtimer_force_reprogram(cpu_base, 1);
1129 }
1130 
1131 /*
1132  * remove hrtimer, called with base lock held
1133  */
1134 static inline int
1135 remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base,
1136            bool restart, bool keep_local)
1137 {
1138     u8 state = timer->state;
1139 
1140     if (state & HRTIMER_STATE_ENQUEUED) {
1141         bool reprogram;
1142 
1143         /*
1144          * Remove the timer and force reprogramming when high
1145          * resolution mode is active and the timer is on the current
1146          * CPU. If we remove a timer on another CPU, reprogramming is
1147          * skipped. The interrupt event on this CPU is fired and
1148          * reprogramming happens in the interrupt handler. This is a
1149          * rare case and less expensive than a smp call.
1150          */
1151         debug_deactivate(timer);
1152         reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
1153 
1154         /*
1155          * If the timer is not restarted then reprogramming is
1156          * required if the timer is local. If it is local and about
1157          * to be restarted, avoid programming it twice (on removal
1158          * and a moment later when it's requeued).
1159          */
1160         if (!restart)
1161             state = HRTIMER_STATE_INACTIVE;
1162         else
1163             reprogram &= !keep_local;
1164 
1165         __remove_hrtimer(timer, base, state, reprogram);
1166         return 1;
1167     }
1168     return 0;
1169 }
1170 
1171 static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
1172                         const enum hrtimer_mode mode)
1173 {
1174 #ifdef CONFIG_TIME_LOW_RES
1175     /*
1176      * CONFIG_TIME_LOW_RES indicates that the system has no way to return
1177      * granular time values. For relative timers we add hrtimer_resolution
1178      * (i.e. one jiffie) to prevent short timeouts.
1179      */
1180     timer->is_rel = mode & HRTIMER_MODE_REL;
1181     if (timer->is_rel)
1182         tim = ktime_add_safe(tim, hrtimer_resolution);
1183 #endif
1184     return tim;
1185 }
1186 
1187 static void
1188 hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram)
1189 {
1190     ktime_t expires;
1191 
1192     /*
1193      * Find the next SOFT expiration.
1194      */
1195     expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
1196 
1197     /*
1198      * reprogramming needs to be triggered, even if the next soft
1199      * hrtimer expires at the same time than the next hard
1200      * hrtimer. cpu_base->softirq_expires_next needs to be updated!
1201      */
1202     if (expires == KTIME_MAX)
1203         return;
1204 
1205     /*
1206      * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event()
1207      * cpu_base->*expires_next is only set by hrtimer_reprogram()
1208      */
1209     hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram);
1210 }
1211 
1212 static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
1213                     u64 delta_ns, const enum hrtimer_mode mode,
1214                     struct hrtimer_clock_base *base)
1215 {
1216     struct hrtimer_clock_base *new_base;
1217     bool force_local, first;
1218 
1219     /*
1220      * If the timer is on the local cpu base and is the first expiring
1221      * timer then this might end up reprogramming the hardware twice
1222      * (on removal and on enqueue). To avoid that by prevent the
1223      * reprogram on removal, keep the timer local to the current CPU
1224      * and enforce reprogramming after it is queued no matter whether
1225      * it is the new first expiring timer again or not.
1226      */
1227     force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
1228     force_local &= base->cpu_base->next_timer == timer;
1229 
1230     /*
1231      * Remove an active timer from the queue. In case it is not queued
1232      * on the current CPU, make sure that remove_hrtimer() updates the
1233      * remote data correctly.
1234      *
1235      * If it's on the current CPU and the first expiring timer, then
1236      * skip reprogramming, keep the timer local and enforce
1237      * reprogramming later if it was the first expiring timer.  This
1238      * avoids programming the underlying clock event twice (once at
1239      * removal and once after enqueue).
1240      */
1241     remove_hrtimer(timer, base, true, force_local);
1242 
1243     if (mode & HRTIMER_MODE_REL)
1244         tim = ktime_add_safe(tim, base->get_time());
1245 
1246     tim = hrtimer_update_lowres(timer, tim, mode);
1247 
1248     hrtimer_set_expires_range_ns(timer, tim, delta_ns);
1249 
1250     /* Switch the timer base, if necessary: */
1251     if (!force_local) {
1252         new_base = switch_hrtimer_base(timer, base,
1253                            mode & HRTIMER_MODE_PINNED);
1254     } else {
1255         new_base = base;
1256     }
1257 
1258     first = enqueue_hrtimer(timer, new_base, mode);
1259     if (!force_local)
1260         return first;
1261 
1262     /*
1263      * Timer was forced to stay on the current CPU to avoid
1264      * reprogramming on removal and enqueue. Force reprogram the
1265      * hardware by evaluating the new first expiring timer.
1266      */
1267     hrtimer_force_reprogram(new_base->cpu_base, 1);
1268     return 0;
1269 }
1270 
1271 /**
1272  * hrtimer_start_range_ns - (re)start an hrtimer
1273  * @timer:  the timer to be added
1274  * @tim:    expiry time
1275  * @delta_ns:   "slack" range for the timer
1276  * @mode:   timer mode: absolute (HRTIMER_MODE_ABS) or
1277  *      relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED);
1278  *      softirq based mode is considered for debug purpose only!
1279  */
1280 void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
1281                 u64 delta_ns, const enum hrtimer_mode mode)
1282 {
1283     struct hrtimer_clock_base *base;
1284     unsigned long flags;
1285 
1286     /*
1287      * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
1288      * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard
1289      * expiry mode because unmarked timers are moved to softirq expiry.
1290      */
1291     if (!IS_ENABLED(CONFIG_PREEMPT_RT))
1292         WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft);
1293     else
1294         WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard);
1295 
1296     base = lock_hrtimer_base(timer, &flags);
1297 
1298     if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base))
1299         hrtimer_reprogram(timer, true);
1300 
1301     unlock_hrtimer_base(timer, &flags);
1302 }
1303 EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
1304 
1305 /**
1306  * hrtimer_try_to_cancel - try to deactivate a timer
1307  * @timer:  hrtimer to stop
1308  *
1309  * Returns:
1310  *
1311  *  *  0 when the timer was not active
1312  *  *  1 when the timer was active
1313  *  * -1 when the timer is currently executing the callback function and
1314  *    cannot be stopped
1315  */
1316 int hrtimer_try_to_cancel(struct hrtimer *timer)
1317 {
1318     struct hrtimer_clock_base *base;
1319     unsigned long flags;
1320     int ret = -1;
1321 
1322     /*
1323      * Check lockless first. If the timer is not active (neither
1324      * enqueued nor running the callback, nothing to do here.  The
1325      * base lock does not serialize against a concurrent enqueue,
1326      * so we can avoid taking it.
1327      */
1328     if (!hrtimer_active(timer))
1329         return 0;
1330 
1331     base = lock_hrtimer_base(timer, &flags);
1332 
1333     if (!hrtimer_callback_running(timer))
1334         ret = remove_hrtimer(timer, base, false, false);
1335 
1336     unlock_hrtimer_base(timer, &flags);
1337 
1338     return ret;
1339 
1340 }
1341 EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
1342 
1343 #ifdef CONFIG_PREEMPT_RT
1344 static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base)
1345 {
1346     spin_lock_init(&base->softirq_expiry_lock);
1347 }
1348 
1349 static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base)
1350 {
1351     spin_lock(&base->softirq_expiry_lock);
1352 }
1353 
1354 static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base)
1355 {
1356     spin_unlock(&base->softirq_expiry_lock);
1357 }
1358 
1359 /*
1360  * The counterpart to hrtimer_cancel_wait_running().
1361  *
1362  * If there is a waiter for cpu_base->expiry_lock, then it was waiting for
1363  * the timer callback to finish. Drop expiry_lock and reacquire it. That
1364  * allows the waiter to acquire the lock and make progress.
1365  */
1366 static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
1367                       unsigned long flags)
1368 {
1369     if (atomic_read(&cpu_base->timer_waiters)) {
1370         raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1371         spin_unlock(&cpu_base->softirq_expiry_lock);
1372         spin_lock(&cpu_base->softirq_expiry_lock);
1373         raw_spin_lock_irq(&cpu_base->lock);
1374     }
1375 }
1376 
1377 /*
1378  * This function is called on PREEMPT_RT kernels when the fast path
1379  * deletion of a timer failed because the timer callback function was
1380  * running.
1381  *
1382  * This prevents priority inversion: if the soft irq thread is preempted
1383  * in the middle of a timer callback, then calling del_timer_sync() can
1384  * lead to two issues:
1385  *
1386  *  - If the caller is on a remote CPU then it has to spin wait for the timer
1387  *    handler to complete. This can result in unbound priority inversion.
1388  *
1389  *  - If the caller originates from the task which preempted the timer
1390  *    handler on the same CPU, then spin waiting for the timer handler to
1391  *    complete is never going to end.
1392  */
1393 void hrtimer_cancel_wait_running(const struct hrtimer *timer)
1394 {
1395     /* Lockless read. Prevent the compiler from reloading it below */
1396     struct hrtimer_clock_base *base = READ_ONCE(timer->base);
1397 
1398     /*
1399      * Just relax if the timer expires in hard interrupt context or if
1400      * it is currently on the migration base.
1401      */
1402     if (!timer->is_soft || is_migration_base(base)) {
1403         cpu_relax();
1404         return;
1405     }
1406 
1407     /*
1408      * Mark the base as contended and grab the expiry lock, which is
1409      * held by the softirq across the timer callback. Drop the lock
1410      * immediately so the softirq can expire the next timer. In theory
1411      * the timer could already be running again, but that's more than
1412      * unlikely and just causes another wait loop.
1413      */
1414     atomic_inc(&base->cpu_base->timer_waiters);
1415     spin_lock_bh(&base->cpu_base->softirq_expiry_lock);
1416     atomic_dec(&base->cpu_base->timer_waiters);
1417     spin_unlock_bh(&base->cpu_base->softirq_expiry_lock);
1418 }
1419 #else
1420 static inline void
1421 hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { }
1422 static inline void
1423 hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { }
1424 static inline void
1425 hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { }
1426 static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base,
1427                          unsigned long flags) { }
1428 #endif
1429 
1430 /**
1431  * hrtimer_cancel - cancel a timer and wait for the handler to finish.
1432  * @timer:  the timer to be cancelled
1433  *
1434  * Returns:
1435  *  0 when the timer was not active
1436  *  1 when the timer was active
1437  */
1438 int hrtimer_cancel(struct hrtimer *timer)
1439 {
1440     int ret;
1441 
1442     do {
1443         ret = hrtimer_try_to_cancel(timer);
1444 
1445         if (ret < 0)
1446             hrtimer_cancel_wait_running(timer);
1447     } while (ret < 0);
1448     return ret;
1449 }
1450 EXPORT_SYMBOL_GPL(hrtimer_cancel);
1451 
1452 /**
1453  * __hrtimer_get_remaining - get remaining time for the timer
1454  * @timer:  the timer to read
1455  * @adjust: adjust relative timers when CONFIG_TIME_LOW_RES=y
1456  */
1457 ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust)
1458 {
1459     unsigned long flags;
1460     ktime_t rem;
1461 
1462     lock_hrtimer_base(timer, &flags);
1463     if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust)
1464         rem = hrtimer_expires_remaining_adjusted(timer);
1465     else
1466         rem = hrtimer_expires_remaining(timer);
1467     unlock_hrtimer_base(timer, &flags);
1468 
1469     return rem;
1470 }
1471 EXPORT_SYMBOL_GPL(__hrtimer_get_remaining);
1472 
1473 #ifdef CONFIG_NO_HZ_COMMON
1474 /**
1475  * hrtimer_get_next_event - get the time until next expiry event
1476  *
1477  * Returns the next expiry time or KTIME_MAX if no timer is pending.
1478  */
1479 u64 hrtimer_get_next_event(void)
1480 {
1481     struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1482     u64 expires = KTIME_MAX;
1483     unsigned long flags;
1484 
1485     raw_spin_lock_irqsave(&cpu_base->lock, flags);
1486 
1487     if (!__hrtimer_hres_active(cpu_base))
1488         expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
1489 
1490     raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1491 
1492     return expires;
1493 }
1494 
1495 /**
1496  * hrtimer_next_event_without - time until next expiry event w/o one timer
1497  * @exclude:    timer to exclude
1498  *
1499  * Returns the next expiry time over all timers except for the @exclude one or
1500  * KTIME_MAX if none of them is pending.
1501  */
1502 u64 hrtimer_next_event_without(const struct hrtimer *exclude)
1503 {
1504     struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1505     u64 expires = KTIME_MAX;
1506     unsigned long flags;
1507 
1508     raw_spin_lock_irqsave(&cpu_base->lock, flags);
1509 
1510     if (__hrtimer_hres_active(cpu_base)) {
1511         unsigned int active;
1512 
1513         if (!cpu_base->softirq_activated) {
1514             active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
1515             expires = __hrtimer_next_event_base(cpu_base, exclude,
1516                                 active, KTIME_MAX);
1517         }
1518         active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
1519         expires = __hrtimer_next_event_base(cpu_base, exclude, active,
1520                             expires);
1521     }
1522 
1523     raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1524 
1525     return expires;
1526 }
1527 #endif
1528 
1529 static inline int hrtimer_clockid_to_base(clockid_t clock_id)
1530 {
1531     if (likely(clock_id < MAX_CLOCKS)) {
1532         int base = hrtimer_clock_to_base_table[clock_id];
1533 
1534         if (likely(base != HRTIMER_MAX_CLOCK_BASES))
1535             return base;
1536     }
1537     WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id);
1538     return HRTIMER_BASE_MONOTONIC;
1539 }
1540 
1541 static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1542                enum hrtimer_mode mode)
1543 {
1544     bool softtimer = !!(mode & HRTIMER_MODE_SOFT);
1545     struct hrtimer_cpu_base *cpu_base;
1546     int base;
1547 
1548     /*
1549      * On PREEMPT_RT enabled kernels hrtimers which are not explicitly
1550      * marked for hard interrupt expiry mode are moved into soft
1551      * interrupt context for latency reasons and because the callbacks
1552      * can invoke functions which might sleep on RT, e.g. spin_lock().
1553      */
1554     if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD))
1555         softtimer = true;
1556 
1557     memset(timer, 0, sizeof(struct hrtimer));
1558 
1559     cpu_base = raw_cpu_ptr(&hrtimer_bases);
1560 
1561     /*
1562      * POSIX magic: Relative CLOCK_REALTIME timers are not affected by
1563      * clock modifications, so they needs to become CLOCK_MONOTONIC to
1564      * ensure POSIX compliance.
1565      */
1566     if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL)
1567         clock_id = CLOCK_MONOTONIC;
1568 
1569     base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
1570     base += hrtimer_clockid_to_base(clock_id);
1571     timer->is_soft = softtimer;
1572     timer->is_hard = !!(mode & HRTIMER_MODE_HARD);
1573     timer->base = &cpu_base->clock_base[base];
1574     timerqueue_init(&timer->node);
1575 }
1576 
1577 /**
1578  * hrtimer_init - initialize a timer to the given clock
1579  * @timer:  the timer to be initialized
1580  * @clock_id:   the clock to be used
1581  * @mode:       The modes which are relevant for initialization:
1582  *              HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT,
1583  *              HRTIMER_MODE_REL_SOFT
1584  *
1585  *              The PINNED variants of the above can be handed in,
1586  *              but the PINNED bit is ignored as pinning happens
1587  *              when the hrtimer is started
1588  */
1589 void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1590           enum hrtimer_mode mode)
1591 {
1592     debug_init(timer, clock_id, mode);
1593     __hrtimer_init(timer, clock_id, mode);
1594 }
1595 EXPORT_SYMBOL_GPL(hrtimer_init);
1596 
1597 /*
1598  * A timer is active, when it is enqueued into the rbtree or the
1599  * callback function is running or it's in the state of being migrated
1600  * to another cpu.
1601  *
1602  * It is important for this function to not return a false negative.
1603  */
1604 bool hrtimer_active(const struct hrtimer *timer)
1605 {
1606     struct hrtimer_clock_base *base;
1607     unsigned int seq;
1608 
1609     do {
1610         base = READ_ONCE(timer->base);
1611         seq = raw_read_seqcount_begin(&base->seq);
1612 
1613         if (timer->state != HRTIMER_STATE_INACTIVE ||
1614             base->running == timer)
1615             return true;
1616 
1617     } while (read_seqcount_retry(&base->seq, seq) ||
1618          base != READ_ONCE(timer->base));
1619 
1620     return false;
1621 }
1622 EXPORT_SYMBOL_GPL(hrtimer_active);
1623 
1624 /*
1625  * The write_seqcount_barrier()s in __run_hrtimer() split the thing into 3
1626  * distinct sections:
1627  *
1628  *  - queued:   the timer is queued
1629  *  - callback: the timer is being ran
1630  *  - post: the timer is inactive or (re)queued
1631  *
1632  * On the read side we ensure we observe timer->state and cpu_base->running
1633  * from the same section, if anything changed while we looked at it, we retry.
1634  * This includes timer->base changing because sequence numbers alone are
1635  * insufficient for that.
1636  *
1637  * The sequence numbers are required because otherwise we could still observe
1638  * a false negative if the read side got smeared over multiple consecutive
1639  * __run_hrtimer() invocations.
1640  */
1641 
1642 static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
1643               struct hrtimer_clock_base *base,
1644               struct hrtimer *timer, ktime_t *now,
1645               unsigned long flags) __must_hold(&cpu_base->lock)
1646 {
1647     enum hrtimer_restart (*fn)(struct hrtimer *);
1648     bool expires_in_hardirq;
1649     int restart;
1650 
1651     lockdep_assert_held(&cpu_base->lock);
1652 
1653     debug_deactivate(timer);
1654     base->running = timer;
1655 
1656     /*
1657      * Separate the ->running assignment from the ->state assignment.
1658      *
1659      * As with a regular write barrier, this ensures the read side in
1660      * hrtimer_active() cannot observe base->running == NULL &&
1661      * timer->state == INACTIVE.
1662      */
1663     raw_write_seqcount_barrier(&base->seq);
1664 
1665     __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
1666     fn = timer->function;
1667 
1668     /*
1669      * Clear the 'is relative' flag for the TIME_LOW_RES case. If the
1670      * timer is restarted with a period then it becomes an absolute
1671      * timer. If its not restarted it does not matter.
1672      */
1673     if (IS_ENABLED(CONFIG_TIME_LOW_RES))
1674         timer->is_rel = false;
1675 
1676     /*
1677      * The timer is marked as running in the CPU base, so it is
1678      * protected against migration to a different CPU even if the lock
1679      * is dropped.
1680      */
1681     raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1682     trace_hrtimer_expire_entry(timer, now);
1683     expires_in_hardirq = lockdep_hrtimer_enter(timer);
1684 
1685     restart = fn(timer);
1686 
1687     lockdep_hrtimer_exit(expires_in_hardirq);
1688     trace_hrtimer_expire_exit(timer);
1689     raw_spin_lock_irq(&cpu_base->lock);
1690 
1691     /*
1692      * Note: We clear the running state after enqueue_hrtimer and
1693      * we do not reprogram the event hardware. Happens either in
1694      * hrtimer_start_range_ns() or in hrtimer_interrupt()
1695      *
1696      * Note: Because we dropped the cpu_base->lock above,
1697      * hrtimer_start_range_ns() can have popped in and enqueued the timer
1698      * for us already.
1699      */
1700     if (restart != HRTIMER_NORESTART &&
1701         !(timer->state & HRTIMER_STATE_ENQUEUED))
1702         enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS);
1703 
1704     /*
1705      * Separate the ->running assignment from the ->state assignment.
1706      *
1707      * As with a regular write barrier, this ensures the read side in
1708      * hrtimer_active() cannot observe base->running.timer == NULL &&
1709      * timer->state == INACTIVE.
1710      */
1711     raw_write_seqcount_barrier(&base->seq);
1712 
1713     WARN_ON_ONCE(base->running != timer);
1714     base->running = NULL;
1715 }
1716 
1717 static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
1718                  unsigned long flags, unsigned int active_mask)
1719 {
1720     struct hrtimer_clock_base *base;
1721     unsigned int active = cpu_base->active_bases & active_mask;
1722 
1723     for_each_active_base(base, cpu_base, active) {
1724         struct timerqueue_node *node;
1725         ktime_t basenow;
1726 
1727         basenow = ktime_add(now, base->offset);
1728 
1729         while ((node = timerqueue_getnext(&base->active))) {
1730             struct hrtimer *timer;
1731 
1732             timer = container_of(node, struct hrtimer, node);
1733 
1734             /*
1735              * The immediate goal for using the softexpires is
1736              * minimizing wakeups, not running timers at the
1737              * earliest interrupt after their soft expiration.
1738              * This allows us to avoid using a Priority Search
1739              * Tree, which can answer a stabbing query for
1740              * overlapping intervals and instead use the simple
1741              * BST we already have.
1742              * We don't add extra wakeups by delaying timers that
1743              * are right-of a not yet expired timer, because that
1744              * timer will have to trigger a wakeup anyway.
1745              */
1746             if (basenow < hrtimer_get_softexpires_tv64(timer))
1747                 break;
1748 
1749             __run_hrtimer(cpu_base, base, timer, &basenow, flags);
1750             if (active_mask == HRTIMER_ACTIVE_SOFT)
1751                 hrtimer_sync_wait_running(cpu_base, flags);
1752         }
1753     }
1754 }
1755 
1756 static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
1757 {
1758     struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1759     unsigned long flags;
1760     ktime_t now;
1761 
1762     hrtimer_cpu_base_lock_expiry(cpu_base);
1763     raw_spin_lock_irqsave(&cpu_base->lock, flags);
1764 
1765     now = hrtimer_update_base(cpu_base);
1766     __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT);
1767 
1768     cpu_base->softirq_activated = 0;
1769     hrtimer_update_softirq_timer(cpu_base, true);
1770 
1771     raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1772     hrtimer_cpu_base_unlock_expiry(cpu_base);
1773 }
1774 
1775 #ifdef CONFIG_HIGH_RES_TIMERS
1776 
1777 /*
1778  * High resolution timer interrupt
1779  * Called with interrupts disabled
1780  */
1781 void hrtimer_interrupt(struct clock_event_device *dev)
1782 {
1783     struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1784     ktime_t expires_next, now, entry_time, delta;
1785     unsigned long flags;
1786     int retries = 0;
1787 
1788     BUG_ON(!cpu_base->hres_active);
1789     cpu_base->nr_events++;
1790     dev->next_event = KTIME_MAX;
1791 
1792     raw_spin_lock_irqsave(&cpu_base->lock, flags);
1793     entry_time = now = hrtimer_update_base(cpu_base);
1794 retry:
1795     cpu_base->in_hrtirq = 1;
1796     /*
1797      * We set expires_next to KTIME_MAX here with cpu_base->lock
1798      * held to prevent that a timer is enqueued in our queue via
1799      * the migration code. This does not affect enqueueing of
1800      * timers which run their callback and need to be requeued on
1801      * this CPU.
1802      */
1803     cpu_base->expires_next = KTIME_MAX;
1804 
1805     if (!ktime_before(now, cpu_base->softirq_expires_next)) {
1806         cpu_base->softirq_expires_next = KTIME_MAX;
1807         cpu_base->softirq_activated = 1;
1808         raise_softirq_irqoff(HRTIMER_SOFTIRQ);
1809     }
1810 
1811     __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
1812 
1813     /* Reevaluate the clock bases for the [soft] next expiry */
1814     expires_next = hrtimer_update_next_event(cpu_base);
1815     /*
1816      * Store the new expiry value so the migration code can verify
1817      * against it.
1818      */
1819     cpu_base->expires_next = expires_next;
1820     cpu_base->in_hrtirq = 0;
1821     raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1822 
1823     /* Reprogramming necessary ? */
1824     if (!tick_program_event(expires_next, 0)) {
1825         cpu_base->hang_detected = 0;
1826         return;
1827     }
1828 
1829     /*
1830      * The next timer was already expired due to:
1831      * - tracing
1832      * - long lasting callbacks
1833      * - being scheduled away when running in a VM
1834      *
1835      * We need to prevent that we loop forever in the hrtimer
1836      * interrupt routine. We give it 3 attempts to avoid
1837      * overreacting on some spurious event.
1838      *
1839      * Acquire base lock for updating the offsets and retrieving
1840      * the current time.
1841      */
1842     raw_spin_lock_irqsave(&cpu_base->lock, flags);
1843     now = hrtimer_update_base(cpu_base);
1844     cpu_base->nr_retries++;
1845     if (++retries < 3)
1846         goto retry;
1847     /*
1848      * Give the system a chance to do something else than looping
1849      * here. We stored the entry time, so we know exactly how long
1850      * we spent here. We schedule the next event this amount of
1851      * time away.
1852      */
1853     cpu_base->nr_hangs++;
1854     cpu_base->hang_detected = 1;
1855     raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1856 
1857     delta = ktime_sub(now, entry_time);
1858     if ((unsigned int)delta > cpu_base->max_hang_time)
1859         cpu_base->max_hang_time = (unsigned int) delta;
1860     /*
1861      * Limit it to a sensible value as we enforce a longer
1862      * delay. Give the CPU at least 100ms to catch up.
1863      */
1864     if (delta > 100 * NSEC_PER_MSEC)
1865         expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
1866     else
1867         expires_next = ktime_add(now, delta);
1868     tick_program_event(expires_next, 1);
1869     pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta));
1870 }
1871 
1872 /* called with interrupts disabled */
1873 static inline void __hrtimer_peek_ahead_timers(void)
1874 {
1875     struct tick_device *td;
1876 
1877     if (!hrtimer_hres_active())
1878         return;
1879 
1880     td = this_cpu_ptr(&tick_cpu_device);
1881     if (td && td->evtdev)
1882         hrtimer_interrupt(td->evtdev);
1883 }
1884 
1885 #else /* CONFIG_HIGH_RES_TIMERS */
1886 
1887 static inline void __hrtimer_peek_ahead_timers(void) { }
1888 
1889 #endif  /* !CONFIG_HIGH_RES_TIMERS */
1890 
1891 /*
1892  * Called from run_local_timers in hardirq context every jiffy
1893  */
1894 void hrtimer_run_queues(void)
1895 {
1896     struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1897     unsigned long flags;
1898     ktime_t now;
1899 
1900     if (__hrtimer_hres_active(cpu_base))
1901         return;
1902 
1903     /*
1904      * This _is_ ugly: We have to check periodically, whether we
1905      * can switch to highres and / or nohz mode. The clocksource
1906      * switch happens with xtime_lock held. Notification from
1907      * there only sets the check bit in the tick_oneshot code,
1908      * otherwise we might deadlock vs. xtime_lock.
1909      */
1910     if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) {
1911         hrtimer_switch_to_hres();
1912         return;
1913     }
1914 
1915     raw_spin_lock_irqsave(&cpu_base->lock, flags);
1916     now = hrtimer_update_base(cpu_base);
1917 
1918     if (!ktime_before(now, cpu_base->softirq_expires_next)) {
1919         cpu_base->softirq_expires_next = KTIME_MAX;
1920         cpu_base->softirq_activated = 1;
1921         raise_softirq_irqoff(HRTIMER_SOFTIRQ);
1922     }
1923 
1924     __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
1925     raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1926 }
1927 
1928 /*
1929  * Sleep related functions:
1930  */
1931 static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
1932 {
1933     struct hrtimer_sleeper *t =
1934         container_of(timer, struct hrtimer_sleeper, timer);
1935     struct task_struct *task = t->task;
1936 
1937     t->task = NULL;
1938     if (task)
1939         wake_up_process(task);
1940 
1941     return HRTIMER_NORESTART;
1942 }
1943 
1944 /**
1945  * hrtimer_sleeper_start_expires - Start a hrtimer sleeper timer
1946  * @sl:     sleeper to be started
1947  * @mode:   timer mode abs/rel
1948  *
1949  * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers
1950  * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context)
1951  */
1952 void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
1953                    enum hrtimer_mode mode)
1954 {
1955     /*
1956      * Make the enqueue delivery mode check work on RT. If the sleeper
1957      * was initialized for hard interrupt delivery, force the mode bit.
1958      * This is a special case for hrtimer_sleepers because
1959      * hrtimer_init_sleeper() determines the delivery mode on RT so the
1960      * fiddling with this decision is avoided at the call sites.
1961      */
1962     if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard)
1963         mode |= HRTIMER_MODE_HARD;
1964 
1965     hrtimer_start_expires(&sl->timer, mode);
1966 }
1967 EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);
1968 
1969 static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
1970                    clockid_t clock_id, enum hrtimer_mode mode)
1971 {
1972     /*
1973      * On PREEMPT_RT enabled kernels hrtimers which are not explicitly
1974      * marked for hard interrupt expiry mode are moved into soft
1975      * interrupt context either for latency reasons or because the
1976      * hrtimer callback takes regular spinlocks or invokes other
1977      * functions which are not suitable for hard interrupt context on
1978      * PREEMPT_RT.
1979      *
1980      * The hrtimer_sleeper callback is RT compatible in hard interrupt
1981      * context, but there is a latency concern: Untrusted userspace can
1982      * spawn many threads which arm timers for the same expiry time on
1983      * the same CPU. That causes a latency spike due to the wakeup of
1984      * a gazillion threads.
1985      *
1986      * OTOH, privileged real-time user space applications rely on the
1987      * low latency of hard interrupt wakeups. If the current task is in
1988      * a real-time scheduling class, mark the mode for hard interrupt
1989      * expiry.
1990      */
1991     if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
1992         if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
1993             mode |= HRTIMER_MODE_HARD;
1994     }
1995 
1996     __hrtimer_init(&sl->timer, clock_id, mode);
1997     sl->timer.function = hrtimer_wakeup;
1998     sl->task = current;
1999 }
2000 
2001 /**
2002  * hrtimer_init_sleeper - initialize sleeper to the given clock
2003  * @sl:     sleeper to be initialized
2004  * @clock_id:   the clock to be used
2005  * @mode:   timer mode abs/rel
2006  */
2007 void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
2008               enum hrtimer_mode mode)
2009 {
2010     debug_init(&sl->timer, clock_id, mode);
2011     __hrtimer_init_sleeper(sl, clock_id, mode);
2012 
2013 }
2014 EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
2015 
2016 int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts)
2017 {
2018     switch(restart->nanosleep.type) {
2019 #ifdef CONFIG_COMPAT_32BIT_TIME
2020     case TT_COMPAT:
2021         if (put_old_timespec32(ts, restart->nanosleep.compat_rmtp))
2022             return -EFAULT;
2023         break;
2024 #endif
2025     case TT_NATIVE:
2026         if (put_timespec64(ts, restart->nanosleep.rmtp))
2027             return -EFAULT;
2028         break;
2029     default:
2030         BUG();
2031     }
2032     return -ERESTART_RESTARTBLOCK;
2033 }
2034 
2035 static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
2036 {
2037     struct restart_block *restart;
2038 
2039     do {
2040         set_current_state(TASK_INTERRUPTIBLE);
2041         hrtimer_sleeper_start_expires(t, mode);
2042 
2043         if (likely(t->task))
2044             freezable_schedule();
2045 
2046         hrtimer_cancel(&t->timer);
2047         mode = HRTIMER_MODE_ABS;
2048 
2049     } while (t->task && !signal_pending(current));
2050 
2051     __set_current_state(TASK_RUNNING);
2052 
2053     if (!t->task)
2054         return 0;
2055 
2056     restart = &current->restart_block;
2057     if (restart->nanosleep.type != TT_NONE) {
2058         ktime_t rem = hrtimer_expires_remaining(&t->timer);
2059         struct timespec64 rmt;
2060 
2061         if (rem <= 0)
2062             return 0;
2063         rmt = ktime_to_timespec64(rem);
2064 
2065         return nanosleep_copyout(restart, &rmt);
2066     }
2067     return -ERESTART_RESTARTBLOCK;
2068 }
2069 
2070 static long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
2071 {
2072     struct hrtimer_sleeper t;
2073     int ret;
2074 
2075     hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid,
2076                       HRTIMER_MODE_ABS);
2077     hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
2078     ret = do_nanosleep(&t, HRTIMER_MODE_ABS);
2079     destroy_hrtimer_on_stack(&t.timer);
2080     return ret;
2081 }
2082 
2083 long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
2084                const clockid_t clockid)
2085 {
2086     struct restart_block *restart;
2087     struct hrtimer_sleeper t;
2088     int ret = 0;
2089     u64 slack;
2090 
2091     slack = current->timer_slack_ns;
2092     if (dl_task(current) || rt_task(current))
2093         slack = 0;
2094 
2095     hrtimer_init_sleeper_on_stack(&t, clockid, mode);
2096     hrtimer_set_expires_range_ns(&t.timer, rqtp, slack);
2097     ret = do_nanosleep(&t, mode);
2098     if (ret != -ERESTART_RESTARTBLOCK)
2099         goto out;
2100 
2101     /* Absolute timers do not update the rmtp value and restart: */
2102     if (mode == HRTIMER_MODE_ABS) {
2103         ret = -ERESTARTNOHAND;
2104         goto out;
2105     }
2106 
2107     restart = &current->restart_block;
2108     restart->nanosleep.clockid = t.timer.base->clockid;
2109     restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
2110     set_restart_fn(restart, hrtimer_nanosleep_restart);
2111 out:
2112     destroy_hrtimer_on_stack(&t.timer);
2113     return ret;
2114 }
2115 
2116 #ifdef CONFIG_64BIT
2117 
2118 SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp,
2119         struct __kernel_timespec __user *, rmtp)
2120 {
2121     struct timespec64 tu;
2122 
2123     if (get_timespec64(&tu, rqtp))
2124         return -EFAULT;
2125 
2126     if (!timespec64_valid(&tu))
2127         return -EINVAL;
2128 
2129     current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
2130     current->restart_block.nanosleep.rmtp = rmtp;
2131     return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL,
2132                  CLOCK_MONOTONIC);
2133 }
2134 
2135 #endif
2136 
2137 #ifdef CONFIG_COMPAT_32BIT_TIME
2138 
2139 SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp,
2140                struct old_timespec32 __user *, rmtp)
2141 {
2142     struct timespec64 tu;
2143 
2144     if (get_old_timespec32(&tu, rqtp))
2145         return -EFAULT;
2146 
2147     if (!timespec64_valid(&tu))
2148         return -EINVAL;
2149 
2150     current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
2151     current->restart_block.nanosleep.compat_rmtp = rmtp;
2152     return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL,
2153                  CLOCK_MONOTONIC);
2154 }
2155 #endif
2156 
2157 /*
2158  * Functions related to boot-time initialization:
2159  */
2160 int hrtimers_prepare_cpu(unsigned int cpu)
2161 {
2162     struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
2163     int i;
2164 
2165     for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
2166         struct hrtimer_clock_base *clock_b = &cpu_base->clock_base[i];
2167 
2168         clock_b->cpu_base = cpu_base;
2169         seqcount_raw_spinlock_init(&clock_b->seq, &cpu_base->lock);
2170         timerqueue_init_head(&clock_b->active);
2171     }
2172 
2173     cpu_base->cpu = cpu;
2174     cpu_base->active_bases = 0;
2175     cpu_base->hres_active = 0;
2176     cpu_base->hang_detected = 0;
2177     cpu_base->next_timer = NULL;
2178     cpu_base->softirq_next_timer = NULL;
2179     cpu_base->expires_next = KTIME_MAX;
2180     cpu_base->softirq_expires_next = KTIME_MAX;
2181     hrtimer_cpu_base_init_expiry_lock(cpu_base);
2182     return 0;
2183 }
2184 
2185 #ifdef CONFIG_HOTPLUG_CPU
2186 
2187 static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
2188                 struct hrtimer_clock_base *new_base)
2189 {
2190     struct hrtimer *timer;
2191     struct timerqueue_node *node;
2192 
2193     while ((node = timerqueue_getnext(&old_base->active))) {
2194         timer = container_of(node, struct hrtimer, node);
2195         BUG_ON(hrtimer_callback_running(timer));
2196         debug_deactivate(timer);
2197 
2198         /*
2199          * Mark it as ENQUEUED not INACTIVE otherwise the
2200          * timer could be seen as !active and just vanish away
2201          * under us on another CPU
2202          */
2203         __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0);
2204         timer->base = new_base;
2205         /*
2206          * Enqueue the timers on the new cpu. This does not
2207          * reprogram the event device in case the timer
2208          * expires before the earliest on this CPU, but we run
2209          * hrtimer_interrupt after we migrated everything to
2210          * sort out already expired timers and reprogram the
2211          * event device.
2212          */
2213         enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS);
2214     }
2215 }
2216 
2217 int hrtimers_dead_cpu(unsigned int scpu)
2218 {
2219     struct hrtimer_cpu_base *old_base, *new_base;
2220     int i;
2221 
2222     BUG_ON(cpu_online(scpu));
2223     tick_cancel_sched_timer(scpu);
2224 
2225     /*
2226      * this BH disable ensures that raise_softirq_irqoff() does
2227      * not wakeup ksoftirqd (and acquire the pi-lock) while
2228      * holding the cpu_base lock
2229      */
2230     local_bh_disable();
2231     local_irq_disable();
2232     old_base = &per_cpu(hrtimer_bases, scpu);
2233     new_base = this_cpu_ptr(&hrtimer_bases);
2234     /*
2235      * The caller is globally serialized and nobody else
2236      * takes two locks at once, deadlock is not possible.
2237      */
2238     raw_spin_lock(&new_base->lock);
2239     raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
2240 
2241     for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
2242         migrate_hrtimer_list(&old_base->clock_base[i],
2243                      &new_base->clock_base[i]);
2244     }
2245 
2246     /*
2247      * The migration might have changed the first expiring softirq
2248      * timer on this CPU. Update it.
2249      */
2250     hrtimer_update_softirq_timer(new_base, false);
2251 
2252     raw_spin_unlock(&old_base->lock);
2253     raw_spin_unlock(&new_base->lock);
2254 
2255     /* Check, if we got expired work to do */
2256     __hrtimer_peek_ahead_timers();
2257     local_irq_enable();
2258     local_bh_enable();
2259     return 0;
2260 }
2261 
2262 #endif /* CONFIG_HOTPLUG_CPU */
2263 
2264 void __init hrtimers_init(void)
2265 {
2266     hrtimers_prepare_cpu(smp_processor_id());
2267     open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq);
2268 }
2269 
2270 /**
2271  * schedule_hrtimeout_range_clock - sleep until timeout
2272  * @expires:    timeout value (ktime_t)
2273  * @delta:  slack in expires timeout (ktime_t)
2274  * @mode:   timer mode
2275  * @clock_id:   timer clock to be used
2276  */
2277 int __sched
2278 schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
2279                    const enum hrtimer_mode mode, clockid_t clock_id)
2280 {
2281     struct hrtimer_sleeper t;
2282 
2283     /*
2284      * Optimize when a zero timeout value is given. It does not
2285      * matter whether this is an absolute or a relative time.
2286      */
2287     if (expires && *expires == 0) {
2288         __set_current_state(TASK_RUNNING);
2289         return 0;
2290     }
2291 
2292     /*
2293      * A NULL parameter means "infinite"
2294      */
2295     if (!expires) {
2296         schedule();
2297         return -EINTR;
2298     }
2299 
2300     hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
2301     hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
2302     hrtimer_sleeper_start_expires(&t, mode);
2303 
2304     if (likely(t.task))
2305         schedule();
2306 
2307     hrtimer_cancel(&t.timer);
2308     destroy_hrtimer_on_stack(&t.timer);
2309 
2310     __set_current_state(TASK_RUNNING);
2311 
2312     return !t.task ? 0 : -EINTR;
2313 }
2314 EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock);
2315 
2316 /**
2317  * schedule_hrtimeout_range - sleep until timeout
2318  * @expires:    timeout value (ktime_t)
2319  * @delta:  slack in expires timeout (ktime_t)
2320  * @mode:   timer mode
2321  *
2322  * Make the current task sleep until the given expiry time has
2323  * elapsed. The routine will return immediately unless
2324  * the current task state has been set (see set_current_state()).
2325  *
2326  * The @delta argument gives the kernel the freedom to schedule the
2327  * actual wakeup to a time that is both power and performance friendly.
2328  * The kernel give the normal best effort behavior for "@expires+@delta",
2329  * but may decide to fire the timer earlier, but no earlier than @expires.
2330  *
2331  * You can set the task state as follows -
2332  *
2333  * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
2334  * pass before the routine returns unless the current task is explicitly
2335  * woken up, (e.g. by wake_up_process()).
2336  *
2337  * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
2338  * delivered to the current task or the current task is explicitly woken
2339  * up.
2340  *
2341  * The current task state is guaranteed to be TASK_RUNNING when this
2342  * routine returns.
2343  *
2344  * Returns 0 when the timer has expired. If the task was woken before the
2345  * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
2346  * by an explicit wakeup, it returns -EINTR.
2347  */
2348 int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta,
2349                      const enum hrtimer_mode mode)
2350 {
2351     return schedule_hrtimeout_range_clock(expires, delta, mode,
2352                           CLOCK_MONOTONIC);
2353 }
2354 EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
2355 
2356 /**
2357  * schedule_hrtimeout - sleep until timeout
2358  * @expires:    timeout value (ktime_t)
2359  * @mode:   timer mode
2360  *
2361  * Make the current task sleep until the given expiry time has
2362  * elapsed. The routine will return immediately unless
2363  * the current task state has been set (see set_current_state()).
2364  *
2365  * You can set the task state as follows -
2366  *
2367  * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
2368  * pass before the routine returns unless the current task is explicitly
2369  * woken up, (e.g. by wake_up_process()).
2370  *
2371  * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
2372  * delivered to the current task or the current task is explicitly woken
2373  * up.
2374  *
2375  * The current task state is guaranteed to be TASK_RUNNING when this
2376  * routine returns.
2377  *
2378  * Returns 0 when the timer has expired. If the task was woken before the
2379  * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
2380  * by an explicit wakeup, it returns -EINTR.
2381  */
2382 int __sched schedule_hrtimeout(ktime_t *expires,
2383                    const enum hrtimer_mode mode)
2384 {
2385     return schedule_hrtimeout_range(expires, 0, mode);
2386 }
2387 EXPORT_SYMBOL_GPL(schedule_hrtimeout);