0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #include <linux/cpu.h>
0012 #include <linux/err.h>
0013 #include <linux/hrtimer.h>
0014 #include <linux/interrupt.h>
0015 #include <linux/kernel_stat.h>
0016 #include <linux/percpu.h>
0017 #include <linux/nmi.h>
0018 #include <linux/profile.h>
0019 #include <linux/sched/signal.h>
0020 #include <linux/sched/clock.h>
0021 #include <linux/sched/stat.h>
0022 #include <linux/sched/nohz.h>
0023 #include <linux/sched/loadavg.h>
0024 #include <linux/module.h>
0025 #include <linux/irq_work.h>
0026 #include <linux/posix-timers.h>
0027 #include <linux/context_tracking.h>
0028 #include <linux/mm.h>
0029
0030 #include <asm/irq_regs.h>
0031
0032 #include "tick-internal.h"
0033
0034 #include <trace/events/timer.h>
0035
0036
0037
0038
0039 static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
0040
0041 struct tick_sched *tick_get_tick_sched(int cpu)
0042 {
0043 return &per_cpu(tick_cpu_sched, cpu);
0044 }
0045
0046 #if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
0047
0048
0049
0050
0051
0052 static ktime_t last_jiffies_update;
0053
0054
0055
0056
0057 static void tick_do_update_jiffies64(ktime_t now)
0058 {
0059 unsigned long ticks = 1;
0060 ktime_t delta, nextp;
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071 if (IS_ENABLED(CONFIG_64BIT)) {
0072 if (ktime_before(now, smp_load_acquire(&tick_next_period)))
0073 return;
0074 } else {
0075 unsigned int seq;
0076
0077
0078
0079
0080
0081 do {
0082 seq = read_seqcount_begin(&jiffies_seq);
0083 nextp = tick_next_period;
0084 } while (read_seqcount_retry(&jiffies_seq, seq));
0085
0086 if (ktime_before(now, nextp))
0087 return;
0088 }
0089
0090
0091 raw_spin_lock(&jiffies_lock);
0092
0093
0094
0095
0096 if (ktime_before(now, tick_next_period)) {
0097 raw_spin_unlock(&jiffies_lock);
0098 return;
0099 }
0100
0101 write_seqcount_begin(&jiffies_seq);
0102
0103 delta = ktime_sub(now, tick_next_period);
0104 if (unlikely(delta >= TICK_NSEC)) {
0105
0106 s64 incr = TICK_NSEC;
0107
0108 ticks += ktime_divns(delta, incr);
0109
0110 last_jiffies_update = ktime_add_ns(last_jiffies_update,
0111 incr * ticks);
0112 } else {
0113 last_jiffies_update = ktime_add_ns(last_jiffies_update,
0114 TICK_NSEC);
0115 }
0116
0117
0118 jiffies_64 += ticks;
0119
0120
0121
0122
0123 nextp = ktime_add_ns(last_jiffies_update, TICK_NSEC);
0124
0125 if (IS_ENABLED(CONFIG_64BIT)) {
0126
0127
0128
0129
0130
0131
0132 smp_store_release(&tick_next_period, nextp);
0133 } else {
0134
0135
0136
0137
0138 tick_next_period = nextp;
0139 }
0140
0141
0142
0143
0144
0145
0146 write_seqcount_end(&jiffies_seq);
0147
0148 calc_global_load();
0149
0150 raw_spin_unlock(&jiffies_lock);
0151 update_wall_time();
0152 }
0153
0154
0155
0156
0157 static ktime_t tick_init_jiffy_update(void)
0158 {
0159 ktime_t period;
0160
0161 raw_spin_lock(&jiffies_lock);
0162 write_seqcount_begin(&jiffies_seq);
0163
0164 if (last_jiffies_update == 0)
0165 last_jiffies_update = tick_next_period;
0166 period = last_jiffies_update;
0167 write_seqcount_end(&jiffies_seq);
0168 raw_spin_unlock(&jiffies_lock);
0169 return period;
0170 }
0171
0172 #define MAX_STALLED_JIFFIES 5
0173
0174 static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
0175 {
0176 int cpu = smp_processor_id();
0177
0178 #ifdef CONFIG_NO_HZ_COMMON
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188
0189 if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
0190 #ifdef CONFIG_NO_HZ_FULL
0191 WARN_ON_ONCE(tick_nohz_full_running);
0192 #endif
0193 tick_do_timer_cpu = cpu;
0194 }
0195 #endif
0196
0197
0198 if (tick_do_timer_cpu == cpu)
0199 tick_do_update_jiffies64(now);
0200
0201
0202
0203
0204
0205 if (ts->last_tick_jiffies != jiffies) {
0206 ts->stalled_jiffies = 0;
0207 ts->last_tick_jiffies = READ_ONCE(jiffies);
0208 } else {
0209 if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) {
0210 tick_do_update_jiffies64(now);
0211 ts->stalled_jiffies = 0;
0212 ts->last_tick_jiffies = READ_ONCE(jiffies);
0213 }
0214 }
0215
0216 if (ts->inidle)
0217 ts->got_idle_tick = 1;
0218 }
0219
0220 static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
0221 {
0222 #ifdef CONFIG_NO_HZ_COMMON
0223
0224
0225
0226
0227
0228
0229
0230
0231 if (ts->tick_stopped) {
0232 touch_softlockup_watchdog_sched();
0233 if (is_idle_task(current))
0234 ts->idle_jiffies++;
0235
0236
0237
0238
0239
0240 ts->next_tick = 0;
0241 }
0242 #endif
0243 update_process_times(user_mode(regs));
0244 profile_tick(CPU_PROFILING);
0245 }
0246 #endif
0247
0248 #ifdef CONFIG_NO_HZ_FULL
0249 cpumask_var_t tick_nohz_full_mask;
0250 EXPORT_SYMBOL_GPL(tick_nohz_full_mask);
0251 bool tick_nohz_full_running;
0252 EXPORT_SYMBOL_GPL(tick_nohz_full_running);
0253 static atomic_t tick_dep_mask;
0254
0255 static bool check_tick_dependency(atomic_t *dep)
0256 {
0257 int val = atomic_read(dep);
0258
0259 if (val & TICK_DEP_MASK_POSIX_TIMER) {
0260 trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
0261 return true;
0262 }
0263
0264 if (val & TICK_DEP_MASK_PERF_EVENTS) {
0265 trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
0266 return true;
0267 }
0268
0269 if (val & TICK_DEP_MASK_SCHED) {
0270 trace_tick_stop(0, TICK_DEP_MASK_SCHED);
0271 return true;
0272 }
0273
0274 if (val & TICK_DEP_MASK_CLOCK_UNSTABLE) {
0275 trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
0276 return true;
0277 }
0278
0279 if (val & TICK_DEP_MASK_RCU) {
0280 trace_tick_stop(0, TICK_DEP_MASK_RCU);
0281 return true;
0282 }
0283
0284 return false;
0285 }
0286
0287 static bool can_stop_full_tick(int cpu, struct tick_sched *ts)
0288 {
0289 lockdep_assert_irqs_disabled();
0290
0291 if (unlikely(!cpu_online(cpu)))
0292 return false;
0293
0294 if (check_tick_dependency(&tick_dep_mask))
0295 return false;
0296
0297 if (check_tick_dependency(&ts->tick_dep_mask))
0298 return false;
0299
0300 if (check_tick_dependency(¤t->tick_dep_mask))
0301 return false;
0302
0303 if (check_tick_dependency(¤t->signal->tick_dep_mask))
0304 return false;
0305
0306 return true;
0307 }
0308
0309 static void nohz_full_kick_func(struct irq_work *work)
0310 {
0311
0312 }
0313
0314 static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) =
0315 IRQ_WORK_INIT_HARD(nohz_full_kick_func);
0316
0317
0318
0319
0320
0321
0322
0323 static void tick_nohz_full_kick(void)
0324 {
0325 if (!tick_nohz_full_cpu(smp_processor_id()))
0326 return;
0327
0328 irq_work_queue(this_cpu_ptr(&nohz_full_kick_work));
0329 }
0330
0331
0332
0333
0334
0335 void tick_nohz_full_kick_cpu(int cpu)
0336 {
0337 if (!tick_nohz_full_cpu(cpu))
0338 return;
0339
0340 irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
0341 }
0342
0343 static void tick_nohz_kick_task(struct task_struct *tsk)
0344 {
0345 int cpu;
0346
0347
0348
0349
0350
0351
0352
0353
0354
0355
0356
0357
0358
0359 if (!sched_task_on_rq(tsk))
0360 return;
0361
0362
0363
0364
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375 cpu = task_cpu(tsk);
0376
0377 preempt_disable();
0378 if (cpu_online(cpu))
0379 tick_nohz_full_kick_cpu(cpu);
0380 preempt_enable();
0381 }
0382
0383
0384
0385
0386
0387 static void tick_nohz_full_kick_all(void)
0388 {
0389 int cpu;
0390
0391 if (!tick_nohz_full_running)
0392 return;
0393
0394 preempt_disable();
0395 for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
0396 tick_nohz_full_kick_cpu(cpu);
0397 preempt_enable();
0398 }
0399
0400 static void tick_nohz_dep_set_all(atomic_t *dep,
0401 enum tick_dep_bits bit)
0402 {
0403 int prev;
0404
0405 prev = atomic_fetch_or(BIT(bit), dep);
0406 if (!prev)
0407 tick_nohz_full_kick_all();
0408 }
0409
0410
0411
0412
0413
0414 void tick_nohz_dep_set(enum tick_dep_bits bit)
0415 {
0416 tick_nohz_dep_set_all(&tick_dep_mask, bit);
0417 }
0418
0419 void tick_nohz_dep_clear(enum tick_dep_bits bit)
0420 {
0421 atomic_andnot(BIT(bit), &tick_dep_mask);
0422 }
0423
0424
0425
0426
0427
0428 void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
0429 {
0430 int prev;
0431 struct tick_sched *ts;
0432
0433 ts = per_cpu_ptr(&tick_cpu_sched, cpu);
0434
0435 prev = atomic_fetch_or(BIT(bit), &ts->tick_dep_mask);
0436 if (!prev) {
0437 preempt_disable();
0438
0439 if (cpu == smp_processor_id()) {
0440 tick_nohz_full_kick();
0441 } else {
0442
0443 if (!WARN_ON_ONCE(in_nmi()))
0444 tick_nohz_full_kick_cpu(cpu);
0445 }
0446 preempt_enable();
0447 }
0448 }
0449 EXPORT_SYMBOL_GPL(tick_nohz_dep_set_cpu);
0450
0451 void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
0452 {
0453 struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
0454
0455 atomic_andnot(BIT(bit), &ts->tick_dep_mask);
0456 }
0457 EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
0458
0459
0460
0461
0462
0463 void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
0464 {
0465 if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask))
0466 tick_nohz_kick_task(tsk);
0467 }
0468 EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);
0469
0470 void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
0471 {
0472 atomic_andnot(BIT(bit), &tsk->tick_dep_mask);
0473 }
0474 EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task);
0475
0476
0477
0478
0479
0480 void tick_nohz_dep_set_signal(struct task_struct *tsk,
0481 enum tick_dep_bits bit)
0482 {
0483 int prev;
0484 struct signal_struct *sig = tsk->signal;
0485
0486 prev = atomic_fetch_or(BIT(bit), &sig->tick_dep_mask);
0487 if (!prev) {
0488 struct task_struct *t;
0489
0490 lockdep_assert_held(&tsk->sighand->siglock);
0491 __for_each_thread(sig, t)
0492 tick_nohz_kick_task(t);
0493 }
0494 }
0495
0496 void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
0497 {
0498 atomic_andnot(BIT(bit), &sig->tick_dep_mask);
0499 }
0500
0501
0502
0503
0504
0505
0506 void __tick_nohz_task_switch(void)
0507 {
0508 struct tick_sched *ts;
0509
0510 if (!tick_nohz_full_cpu(smp_processor_id()))
0511 return;
0512
0513 ts = this_cpu_ptr(&tick_cpu_sched);
0514
0515 if (ts->tick_stopped) {
0516 if (atomic_read(¤t->tick_dep_mask) ||
0517 atomic_read(¤t->signal->tick_dep_mask))
0518 tick_nohz_full_kick();
0519 }
0520 }
0521
0522
0523 void __init tick_nohz_full_setup(cpumask_var_t cpumask)
0524 {
0525 alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
0526 cpumask_copy(tick_nohz_full_mask, cpumask);
0527 tick_nohz_full_running = true;
0528 }
0529
0530 static int tick_nohz_cpu_down(unsigned int cpu)
0531 {
0532
0533
0534
0535
0536
0537 if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
0538 return -EBUSY;
0539 return 0;
0540 }
0541
0542 void __init tick_nohz_init(void)
0543 {
0544 int cpu, ret;
0545
0546 if (!tick_nohz_full_running)
0547 return;
0548
0549
0550
0551
0552
0553
0554 if (!arch_irq_work_has_interrupt()) {
0555 pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support irq work self-IPIs\n");
0556 cpumask_clear(tick_nohz_full_mask);
0557 tick_nohz_full_running = false;
0558 return;
0559 }
0560
0561 if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) &&
0562 !IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) {
0563 cpu = smp_processor_id();
0564
0565 if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
0566 pr_warn("NO_HZ: Clearing %d from nohz_full range "
0567 "for timekeeping\n", cpu);
0568 cpumask_clear_cpu(cpu, tick_nohz_full_mask);
0569 }
0570 }
0571
0572 for_each_cpu(cpu, tick_nohz_full_mask)
0573 ct_cpu_track_user(cpu);
0574
0575 ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
0576 "kernel/nohz:predown", NULL,
0577 tick_nohz_cpu_down);
0578 WARN_ON(ret < 0);
0579 pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
0580 cpumask_pr_args(tick_nohz_full_mask));
0581 }
0582 #endif
0583
0584
0585
0586
0587 #ifdef CONFIG_NO_HZ_COMMON
0588
0589
0590
0591 bool tick_nohz_enabled __read_mostly = true;
0592 unsigned long tick_nohz_active __read_mostly;
0593
0594
0595
0596 static int __init setup_tick_nohz(char *str)
0597 {
0598 return (kstrtobool(str, &tick_nohz_enabled) == 0);
0599 }
0600
0601 __setup("nohz=", setup_tick_nohz);
0602
0603 bool tick_nohz_tick_stopped(void)
0604 {
0605 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
0606
0607 return ts->tick_stopped;
0608 }
0609
0610 bool tick_nohz_tick_stopped_cpu(int cpu)
0611 {
0612 struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
0613
0614 return ts->tick_stopped;
0615 }
0616
0617
0618
0619
0620
0621
0622
0623
0624
0625
0626
0627 static void tick_nohz_update_jiffies(ktime_t now)
0628 {
0629 unsigned long flags;
0630
0631 __this_cpu_write(tick_cpu_sched.idle_waketime, now);
0632
0633 local_irq_save(flags);
0634 tick_do_update_jiffies64(now);
0635 local_irq_restore(flags);
0636
0637 touch_softlockup_watchdog_sched();
0638 }
0639
0640
0641
0642
0643 static void
0644 update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time)
0645 {
0646 ktime_t delta;
0647
0648 if (ts->idle_active) {
0649 delta = ktime_sub(now, ts->idle_entrytime);
0650 if (nr_iowait_cpu(cpu) > 0)
0651 ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
0652 else
0653 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
0654 ts->idle_entrytime = now;
0655 }
0656
0657 if (last_update_time)
0658 *last_update_time = ktime_to_us(now);
0659
0660 }
0661
0662 static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
0663 {
0664 update_ts_time_stats(smp_processor_id(), ts, now, NULL);
0665 ts->idle_active = 0;
0666
0667 sched_clock_idle_wakeup_event();
0668 }
0669
0670 static void tick_nohz_start_idle(struct tick_sched *ts)
0671 {
0672 ts->idle_entrytime = ktime_get();
0673 ts->idle_active = 1;
0674 sched_clock_idle_sleep_event();
0675 }
0676
0677
0678
0679
0680
0681
0682
0683
0684
0685
0686
0687
0688
0689
0690
0691 u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
0692 {
0693 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
0694 ktime_t now, idle;
0695
0696 if (!tick_nohz_active)
0697 return -1;
0698
0699 now = ktime_get();
0700 if (last_update_time) {
0701 update_ts_time_stats(cpu, ts, now, last_update_time);
0702 idle = ts->idle_sleeptime;
0703 } else {
0704 if (ts->idle_active && !nr_iowait_cpu(cpu)) {
0705 ktime_t delta = ktime_sub(now, ts->idle_entrytime);
0706
0707 idle = ktime_add(ts->idle_sleeptime, delta);
0708 } else {
0709 idle = ts->idle_sleeptime;
0710 }
0711 }
0712
0713 return ktime_to_us(idle);
0714
0715 }
0716 EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
0717
0718
0719
0720
0721
0722
0723
0724
0725
0726
0727
0728
0729
0730
0731
0732 u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
0733 {
0734 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
0735 ktime_t now, iowait;
0736
0737 if (!tick_nohz_active)
0738 return -1;
0739
0740 now = ktime_get();
0741 if (last_update_time) {
0742 update_ts_time_stats(cpu, ts, now, last_update_time);
0743 iowait = ts->iowait_sleeptime;
0744 } else {
0745 if (ts->idle_active && nr_iowait_cpu(cpu) > 0) {
0746 ktime_t delta = ktime_sub(now, ts->idle_entrytime);
0747
0748 iowait = ktime_add(ts->iowait_sleeptime, delta);
0749 } else {
0750 iowait = ts->iowait_sleeptime;
0751 }
0752 }
0753
0754 return ktime_to_us(iowait);
0755 }
0756 EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
0757
0758 static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
0759 {
0760 hrtimer_cancel(&ts->sched_timer);
0761 hrtimer_set_expires(&ts->sched_timer, ts->last_tick);
0762
0763
0764 hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
0765
0766 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
0767 hrtimer_start_expires(&ts->sched_timer,
0768 HRTIMER_MODE_ABS_PINNED_HARD);
0769 } else {
0770 tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
0771 }
0772
0773
0774
0775
0776
0777 ts->next_tick = 0;
0778 }
0779
0780 static inline bool local_timer_softirq_pending(void)
0781 {
0782 return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
0783 }
0784
0785 static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
0786 {
0787 u64 basemono, next_tick, delta, expires;
0788 unsigned long basejiff;
0789 unsigned int seq;
0790
0791
0792 do {
0793 seq = read_seqcount_begin(&jiffies_seq);
0794 basemono = last_jiffies_update;
0795 basejiff = jiffies;
0796 } while (read_seqcount_retry(&jiffies_seq, seq));
0797 ts->last_jiffies = basejiff;
0798 ts->timer_expires_base = basemono;
0799
0800
0801
0802
0803
0804
0805
0806
0807
0808
0809
0810 if (rcu_needs_cpu() || arch_needs_cpu() ||
0811 irq_work_needs_cpu() || local_timer_softirq_pending()) {
0812 next_tick = basemono + TICK_NSEC;
0813 } else {
0814
0815
0816
0817
0818
0819
0820
0821 next_tick = get_next_timer_interrupt(basejiff, basemono);
0822 ts->next_timer = next_tick;
0823 }
0824
0825
0826
0827
0828
0829 delta = next_tick - basemono;
0830 if (delta <= (u64)TICK_NSEC) {
0831
0832
0833
0834
0835 timer_clear_idle();
0836
0837
0838
0839
0840 if (!ts->tick_stopped) {
0841 ts->timer_expires = 0;
0842 goto out;
0843 }
0844 }
0845
0846
0847
0848
0849
0850
0851 delta = timekeeping_max_deferment();
0852 if (cpu != tick_do_timer_cpu &&
0853 (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
0854 delta = KTIME_MAX;
0855
0856
0857 if (delta < (KTIME_MAX - basemono))
0858 expires = basemono + delta;
0859 else
0860 expires = KTIME_MAX;
0861
0862 ts->timer_expires = min_t(u64, expires, next_tick);
0863
0864 out:
0865 return ts->timer_expires;
0866 }
0867
0868 static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
0869 {
0870 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
0871 u64 basemono = ts->timer_expires_base;
0872 u64 expires = ts->timer_expires;
0873 ktime_t tick = expires;
0874
0875
0876 ts->timer_expires_base = 0;
0877
0878
0879
0880
0881
0882
0883
0884
0885
0886 if (cpu == tick_do_timer_cpu) {
0887 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
0888 ts->do_timer_last = 1;
0889 } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
0890 ts->do_timer_last = 0;
0891 }
0892
0893
0894 if (ts->tick_stopped && (expires == ts->next_tick)) {
0895
0896 if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
0897 return;
0898
0899 WARN_ON_ONCE(1);
0900 printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
0901 basemono, ts->next_tick, dev->next_event,
0902 hrtimer_active(&ts->sched_timer), hrtimer_get_expires(&ts->sched_timer));
0903 }
0904
0905
0906
0907
0908
0909
0910
0911
0912 if (!ts->tick_stopped) {
0913 calc_load_nohz_start();
0914 quiet_vmstat();
0915
0916 ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
0917 ts->tick_stopped = 1;
0918 trace_tick_stop(1, TICK_DEP_MASK_NONE);
0919 }
0920
0921 ts->next_tick = tick;
0922
0923
0924
0925
0926
0927 if (unlikely(expires == KTIME_MAX)) {
0928 if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
0929 hrtimer_cancel(&ts->sched_timer);
0930 else
0931 tick_program_event(KTIME_MAX, 1);
0932 return;
0933 }
0934
0935 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
0936 hrtimer_start(&ts->sched_timer, tick,
0937 HRTIMER_MODE_ABS_PINNED_HARD);
0938 } else {
0939 hrtimer_set_expires(&ts->sched_timer, tick);
0940 tick_program_event(tick, 1);
0941 }
0942 }
0943
0944 static void tick_nohz_retain_tick(struct tick_sched *ts)
0945 {
0946 ts->timer_expires_base = 0;
0947 }
0948
0949 #ifdef CONFIG_NO_HZ_FULL
0950 static void tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu)
0951 {
0952 if (tick_nohz_next_event(ts, cpu))
0953 tick_nohz_stop_tick(ts, cpu);
0954 else
0955 tick_nohz_retain_tick(ts);
0956 }
0957 #endif
0958
0959 static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
0960 {
0961
0962 tick_do_update_jiffies64(now);
0963
0964
0965
0966
0967
0968 timer_clear_idle();
0969
0970 calc_load_nohz_stop();
0971 touch_softlockup_watchdog_sched();
0972
0973
0974
0975 ts->tick_stopped = 0;
0976 tick_nohz_restart(ts, now);
0977 }
0978
0979 static void __tick_nohz_full_update_tick(struct tick_sched *ts,
0980 ktime_t now)
0981 {
0982 #ifdef CONFIG_NO_HZ_FULL
0983 int cpu = smp_processor_id();
0984
0985 if (can_stop_full_tick(cpu, ts))
0986 tick_nohz_stop_sched_tick(ts, cpu);
0987 else if (ts->tick_stopped)
0988 tick_nohz_restart_sched_tick(ts, now);
0989 #endif
0990 }
0991
0992 static void tick_nohz_full_update_tick(struct tick_sched *ts)
0993 {
0994 if (!tick_nohz_full_cpu(smp_processor_id()))
0995 return;
0996
0997 if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
0998 return;
0999
1000 __tick_nohz_full_update_tick(ts, ktime_get());
1001 }
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013 static bool report_idle_softirq(void)
1014 {
1015 static int ratelimit;
1016 unsigned int pending = local_softirq_pending();
1017
1018 if (likely(!pending))
1019 return false;
1020
1021
1022 if (!cpu_active(smp_processor_id())) {
1023 pending &= ~SOFTIRQ_HOTPLUG_SAFE_MASK;
1024 if (!pending)
1025 return false;
1026 }
1027
1028 if (ratelimit < 10)
1029 return false;
1030
1031
1032 if (!local_bh_blocked())
1033 return false;
1034
1035 pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n",
1036 pending);
1037 ratelimit++;
1038
1039 return true;
1040 }
1041
1042 static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
1043 {
1044
1045
1046
1047
1048
1049
1050
1051 if (unlikely(!cpu_online(cpu))) {
1052 if (cpu == tick_do_timer_cpu)
1053 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
1054
1055
1056
1057
1058 ts->next_tick = 0;
1059 return false;
1060 }
1061
1062 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
1063 return false;
1064
1065 if (need_resched())
1066 return false;
1067
1068 if (unlikely(report_idle_softirq()))
1069 return false;
1070
1071 if (tick_nohz_full_enabled()) {
1072
1073
1074
1075
1076 if (tick_do_timer_cpu == cpu)
1077 return false;
1078
1079
1080 if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
1081 return false;
1082 }
1083
1084 return true;
1085 }
1086
1087 static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
1088 {
1089 ktime_t expires;
1090 int cpu = smp_processor_id();
1091
1092
1093
1094
1095
1096 if (ts->timer_expires_base)
1097 expires = ts->timer_expires;
1098 else if (can_stop_idle_tick(cpu, ts))
1099 expires = tick_nohz_next_event(ts, cpu);
1100 else
1101 return;
1102
1103 ts->idle_calls++;
1104
1105 if (expires > 0LL) {
1106 int was_stopped = ts->tick_stopped;
1107
1108 tick_nohz_stop_tick(ts, cpu);
1109
1110 ts->idle_sleeps++;
1111 ts->idle_expires = expires;
1112
1113 if (!was_stopped && ts->tick_stopped) {
1114 ts->idle_jiffies = ts->last_jiffies;
1115 nohz_balance_enter_idle(cpu);
1116 }
1117 } else {
1118 tick_nohz_retain_tick(ts);
1119 }
1120 }
1121
1122
1123
1124
1125
1126
1127 void tick_nohz_idle_stop_tick(void)
1128 {
1129 __tick_nohz_idle_stop_tick(this_cpu_ptr(&tick_cpu_sched));
1130 }
1131
1132 void tick_nohz_idle_retain_tick(void)
1133 {
1134 tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
1135
1136
1137
1138
1139 timer_clear_idle();
1140 }
1141
1142
1143
1144
1145
1146
1147 void tick_nohz_idle_enter(void)
1148 {
1149 struct tick_sched *ts;
1150
1151 lockdep_assert_irqs_enabled();
1152
1153 local_irq_disable();
1154
1155 ts = this_cpu_ptr(&tick_cpu_sched);
1156
1157 WARN_ON_ONCE(ts->timer_expires_base);
1158
1159 ts->inidle = 1;
1160 tick_nohz_start_idle(ts);
1161
1162 local_irq_enable();
1163 }
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173 void tick_nohz_irq_exit(void)
1174 {
1175 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1176
1177 if (ts->inidle)
1178 tick_nohz_start_idle(ts);
1179 else
1180 tick_nohz_full_update_tick(ts);
1181 }
1182
1183
1184
1185
1186 bool tick_nohz_idle_got_tick(void)
1187 {
1188 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1189
1190 if (ts->got_idle_tick) {
1191 ts->got_idle_tick = 0;
1192 return true;
1193 }
1194 return false;
1195 }
1196
1197
1198
1199
1200
1201
1202
1203
1204 ktime_t tick_nohz_get_next_hrtimer(void)
1205 {
1206 return __this_cpu_read(tick_cpu_device.evtdev)->next_event;
1207 }
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219 ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
1220 {
1221 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
1222 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1223 int cpu = smp_processor_id();
1224
1225
1226
1227
1228 ktime_t now = ts->idle_entrytime;
1229 ktime_t next_event;
1230
1231 WARN_ON_ONCE(!ts->inidle);
1232
1233 *delta_next = ktime_sub(dev->next_event, now);
1234
1235 if (!can_stop_idle_tick(cpu, ts))
1236 return *delta_next;
1237
1238 next_event = tick_nohz_next_event(ts, cpu);
1239 if (!next_event)
1240 return *delta_next;
1241
1242
1243
1244
1245
1246 next_event = min_t(u64, next_event,
1247 hrtimer_next_event_without(&ts->sched_timer));
1248
1249 return ktime_sub(next_event, now);
1250 }
1251
1252
1253
1254
1255
1256
1257
1258 unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
1259 {
1260 struct tick_sched *ts = tick_get_tick_sched(cpu);
1261
1262 return ts->idle_calls;
1263 }
1264
1265
1266
1267
1268
1269
1270 unsigned long tick_nohz_get_idle_calls(void)
1271 {
1272 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1273
1274 return ts->idle_calls;
1275 }
1276
1277 static void tick_nohz_account_idle_time(struct tick_sched *ts,
1278 ktime_t now)
1279 {
1280 unsigned long ticks;
1281
1282 ts->idle_exittime = now;
1283
1284 if (vtime_accounting_enabled_this_cpu())
1285 return;
1286
1287
1288
1289
1290
1291 ticks = jiffies - ts->idle_jiffies;
1292
1293
1294
1295 if (ticks && ticks < LONG_MAX)
1296 account_idle_ticks(ticks);
1297 }
1298
1299 void tick_nohz_idle_restart_tick(void)
1300 {
1301 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1302
1303 if (ts->tick_stopped) {
1304 ktime_t now = ktime_get();
1305 tick_nohz_restart_sched_tick(ts, now);
1306 tick_nohz_account_idle_time(ts, now);
1307 }
1308 }
1309
1310 static void tick_nohz_idle_update_tick(struct tick_sched *ts, ktime_t now)
1311 {
1312 if (tick_nohz_full_cpu(smp_processor_id()))
1313 __tick_nohz_full_update_tick(ts, now);
1314 else
1315 tick_nohz_restart_sched_tick(ts, now);
1316
1317 tick_nohz_account_idle_time(ts, now);
1318 }
1319
1320
1321
1322
1323
1324
1325
1326
1327 void tick_nohz_idle_exit(void)
1328 {
1329 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1330 bool idle_active, tick_stopped;
1331 ktime_t now;
1332
1333 local_irq_disable();
1334
1335 WARN_ON_ONCE(!ts->inidle);
1336 WARN_ON_ONCE(ts->timer_expires_base);
1337
1338 ts->inidle = 0;
1339 idle_active = ts->idle_active;
1340 tick_stopped = ts->tick_stopped;
1341
1342 if (idle_active || tick_stopped)
1343 now = ktime_get();
1344
1345 if (idle_active)
1346 tick_nohz_stop_idle(ts, now);
1347
1348 if (tick_stopped)
1349 tick_nohz_idle_update_tick(ts, now);
1350
1351 local_irq_enable();
1352 }
1353
1354
1355
1356
1357 static void tick_nohz_handler(struct clock_event_device *dev)
1358 {
1359 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1360 struct pt_regs *regs = get_irq_regs();
1361 ktime_t now = ktime_get();
1362
1363 dev->next_event = KTIME_MAX;
1364
1365 tick_sched_do_timer(ts, now);
1366 tick_sched_handle(ts, regs);
1367
1368 if (unlikely(ts->tick_stopped)) {
1369
1370
1371
1372
1373
1374 tick_program_event(KTIME_MAX, 1);
1375 return;
1376 }
1377
1378 hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
1379 tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
1380 }
1381
1382 static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
1383 {
1384 if (!tick_nohz_enabled)
1385 return;
1386 ts->nohz_mode = mode;
1387
1388 if (!test_and_set_bit(0, &tick_nohz_active))
1389 timers_update_nohz();
1390 }
1391
1392
1393
1394
1395 static void tick_nohz_switch_to_nohz(void)
1396 {
1397 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1398 ktime_t next;
1399
1400 if (!tick_nohz_enabled)
1401 return;
1402
1403 if (tick_switch_to_oneshot(tick_nohz_handler))
1404 return;
1405
1406
1407
1408
1409
1410 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
1411
1412 next = tick_init_jiffy_update();
1413
1414 hrtimer_set_expires(&ts->sched_timer, next);
1415 hrtimer_forward_now(&ts->sched_timer, TICK_NSEC);
1416 tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
1417 tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
1418 }
1419
1420 static inline void tick_nohz_irq_enter(void)
1421 {
1422 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1423 ktime_t now;
1424
1425 if (!ts->idle_active && !ts->tick_stopped)
1426 return;
1427 now = ktime_get();
1428 if (ts->idle_active)
1429 tick_nohz_stop_idle(ts, now);
1430
1431
1432
1433
1434
1435
1436
1437 if (ts->tick_stopped)
1438 tick_nohz_update_jiffies(now);
1439 }
1440
1441 #else
1442
1443 static inline void tick_nohz_switch_to_nohz(void) { }
1444 static inline void tick_nohz_irq_enter(void) { }
1445 static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { }
1446
1447 #endif
1448
1449
1450
1451
1452 void tick_irq_enter(void)
1453 {
1454 tick_check_oneshot_broadcast_this_cpu();
1455 tick_nohz_irq_enter();
1456 }
1457
1458
1459
1460
1461 #ifdef CONFIG_HIGH_RES_TIMERS
1462
1463
1464
1465
1466 static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
1467 {
1468 struct tick_sched *ts =
1469 container_of(timer, struct tick_sched, sched_timer);
1470 struct pt_regs *regs = get_irq_regs();
1471 ktime_t now = ktime_get();
1472
1473 tick_sched_do_timer(ts, now);
1474
1475
1476
1477
1478
1479 if (regs)
1480 tick_sched_handle(ts, regs);
1481 else
1482 ts->next_tick = 0;
1483
1484
1485 if (unlikely(ts->tick_stopped))
1486 return HRTIMER_NORESTART;
1487
1488 hrtimer_forward(timer, now, TICK_NSEC);
1489
1490 return HRTIMER_RESTART;
1491 }
1492
1493 static int sched_skew_tick;
1494
1495 static int __init skew_tick(char *str)
1496 {
1497 get_option(&str, &sched_skew_tick);
1498
1499 return 0;
1500 }
1501 early_param("skew_tick", skew_tick);
1502
1503
1504
1505
1506 void tick_setup_sched_timer(void)
1507 {
1508 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1509 ktime_t now = ktime_get();
1510
1511
1512
1513
1514 hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
1515 ts->sched_timer.function = tick_sched_timer;
1516
1517
1518 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
1519
1520
1521 if (sched_skew_tick) {
1522 u64 offset = TICK_NSEC >> 1;
1523 do_div(offset, num_possible_cpus());
1524 offset *= smp_processor_id();
1525 hrtimer_add_expires_ns(&ts->sched_timer, offset);
1526 }
1527
1528 hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
1529 hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
1530 tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
1531 }
1532 #endif
1533
1534 #if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
1535 void tick_cancel_sched_timer(int cpu)
1536 {
1537 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
1538
1539 # ifdef CONFIG_HIGH_RES_TIMERS
1540 if (ts->sched_timer.base)
1541 hrtimer_cancel(&ts->sched_timer);
1542 # endif
1543
1544 memset(ts, 0, sizeof(*ts));
1545 }
1546 #endif
1547
1548
1549
1550
1551 void tick_clock_notify(void)
1552 {
1553 int cpu;
1554
1555 for_each_possible_cpu(cpu)
1556 set_bit(0, &per_cpu(tick_cpu_sched, cpu).check_clocks);
1557 }
1558
1559
1560
1561
1562 void tick_oneshot_notify(void)
1563 {
1564 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1565
1566 set_bit(0, &ts->check_clocks);
1567 }
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577 int tick_check_oneshot_change(int allow_nohz)
1578 {
1579 struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
1580
1581 if (!test_and_clear_bit(0, &ts->check_clocks))
1582 return 0;
1583
1584 if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
1585 return 0;
1586
1587 if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
1588 return 0;
1589
1590 if (!allow_nohz)
1591 return 1;
1592
1593 tick_nohz_switch_to_nohz();
1594 return 0;
1595 }