0001
0002
0003
0004
0005
0006 #include <linux/sched/signal.h>
0007 #include <linux/sched/cputime.h>
0008 #include <linux/posix-timers.h>
0009 #include <linux/errno.h>
0010 #include <linux/math64.h>
0011 #include <linux/uaccess.h>
0012 #include <linux/kernel_stat.h>
0013 #include <trace/events/timer.h>
0014 #include <linux/tick.h>
0015 #include <linux/workqueue.h>
0016 #include <linux/compat.h>
0017 #include <linux/sched/deadline.h>
0018 #include <linux/task_work.h>
0019
0020 #include "posix-timers.h"
0021
0022 static void posix_cpu_timer_rearm(struct k_itimer *timer);
0023
0024 void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit)
0025 {
0026 posix_cputimers_init(pct);
0027 if (cpu_limit != RLIM_INFINITY) {
0028 pct->bases[CPUCLOCK_PROF].nextevt = cpu_limit * NSEC_PER_SEC;
0029 pct->timers_active = true;
0030 }
0031 }
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042 int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
0043 {
0044 u64 nsecs = rlim_new * NSEC_PER_SEC;
0045 unsigned long irq_fl;
0046
0047 if (!lock_task_sighand(task, &irq_fl))
0048 return -ESRCH;
0049 set_process_cpu_timer(task, CPUCLOCK_PROF, &nsecs, NULL);
0050 unlock_task_sighand(task, &irq_fl);
0051 return 0;
0052 }
0053
0054
0055
0056
0057 static struct pid *pid_for_clock(const clockid_t clock, bool gettime)
0058 {
0059 const bool thread = !!CPUCLOCK_PERTHREAD(clock);
0060 const pid_t upid = CPUCLOCK_PID(clock);
0061 struct pid *pid;
0062
0063 if (CPUCLOCK_WHICH(clock) >= CPUCLOCK_MAX)
0064 return NULL;
0065
0066
0067
0068
0069
0070 if (upid == 0)
0071 return thread ? task_pid(current) : task_tgid(current);
0072
0073 pid = find_vpid(upid);
0074 if (!pid)
0075 return NULL;
0076
0077 if (thread) {
0078 struct task_struct *tsk = pid_task(pid, PIDTYPE_PID);
0079 return (tsk && same_thread_group(tsk, current)) ? pid : NULL;
0080 }
0081
0082
0083
0084
0085
0086
0087
0088 if (gettime && (pid == task_pid(current)))
0089 return task_tgid(current);
0090
0091
0092
0093
0094 return pid_has_task(pid, PIDTYPE_TGID) ? pid : NULL;
0095 }
0096
0097 static inline int validate_clock_permissions(const clockid_t clock)
0098 {
0099 int ret;
0100
0101 rcu_read_lock();
0102 ret = pid_for_clock(clock, false) ? 0 : -EINVAL;
0103 rcu_read_unlock();
0104
0105 return ret;
0106 }
0107
0108 static inline enum pid_type clock_pid_type(const clockid_t clock)
0109 {
0110 return CPUCLOCK_PERTHREAD(clock) ? PIDTYPE_PID : PIDTYPE_TGID;
0111 }
0112
0113 static inline struct task_struct *cpu_timer_task_rcu(struct k_itimer *timer)
0114 {
0115 return pid_task(timer->it.cpu.pid, clock_pid_type(timer->it_clock));
0116 }
0117
0118
0119
0120
0121
0122 static u64 bump_cpu_timer(struct k_itimer *timer, u64 now)
0123 {
0124 u64 delta, incr, expires = timer->it.cpu.node.expires;
0125 int i;
0126
0127 if (!timer->it_interval)
0128 return expires;
0129
0130 if (now < expires)
0131 return expires;
0132
0133 incr = timer->it_interval;
0134 delta = now + incr - expires;
0135
0136
0137 for (i = 0; incr < delta - incr; i++)
0138 incr = incr << 1;
0139
0140 for (; i >= 0; incr >>= 1, i--) {
0141 if (delta < incr)
0142 continue;
0143
0144 timer->it.cpu.node.expires += incr;
0145 timer->it_overrun += 1LL << i;
0146 delta -= incr;
0147 }
0148 return timer->it.cpu.node.expires;
0149 }
0150
0151
0152 static inline bool expiry_cache_is_inactive(const struct posix_cputimers *pct)
0153 {
0154 return !(~pct->bases[CPUCLOCK_PROF].nextevt |
0155 ~pct->bases[CPUCLOCK_VIRT].nextevt |
0156 ~pct->bases[CPUCLOCK_SCHED].nextevt);
0157 }
0158
0159 static int
0160 posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp)
0161 {
0162 int error = validate_clock_permissions(which_clock);
0163
0164 if (!error) {
0165 tp->tv_sec = 0;
0166 tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
0167 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
0168
0169
0170
0171
0172
0173 tp->tv_nsec = 1;
0174 }
0175 }
0176 return error;
0177 }
0178
0179 static int
0180 posix_cpu_clock_set(const clockid_t clock, const struct timespec64 *tp)
0181 {
0182 int error = validate_clock_permissions(clock);
0183
0184
0185
0186
0187
0188 return error ? : -EPERM;
0189 }
0190
0191
0192
0193
0194 static u64 cpu_clock_sample(const clockid_t clkid, struct task_struct *p)
0195 {
0196 u64 utime, stime;
0197
0198 if (clkid == CPUCLOCK_SCHED)
0199 return task_sched_runtime(p);
0200
0201 task_cputime(p, &utime, &stime);
0202
0203 switch (clkid) {
0204 case CPUCLOCK_PROF:
0205 return utime + stime;
0206 case CPUCLOCK_VIRT:
0207 return utime;
0208 default:
0209 WARN_ON_ONCE(1);
0210 }
0211 return 0;
0212 }
0213
0214 static inline void store_samples(u64 *samples, u64 stime, u64 utime, u64 rtime)
0215 {
0216 samples[CPUCLOCK_PROF] = stime + utime;
0217 samples[CPUCLOCK_VIRT] = utime;
0218 samples[CPUCLOCK_SCHED] = rtime;
0219 }
0220
0221 static void task_sample_cputime(struct task_struct *p, u64 *samples)
0222 {
0223 u64 stime, utime;
0224
0225 task_cputime(p, &utime, &stime);
0226 store_samples(samples, stime, utime, p->se.sum_exec_runtime);
0227 }
0228
0229 static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
0230 u64 *samples)
0231 {
0232 u64 stime, utime, rtime;
0233
0234 utime = atomic64_read(&at->utime);
0235 stime = atomic64_read(&at->stime);
0236 rtime = atomic64_read(&at->sum_exec_runtime);
0237 store_samples(samples, stime, utime, rtime);
0238 }
0239
0240
0241
0242
0243
0244 static inline void __update_gt_cputime(atomic64_t *cputime, u64 sum_cputime)
0245 {
0246 u64 curr_cputime;
0247 retry:
0248 curr_cputime = atomic64_read(cputime);
0249 if (sum_cputime > curr_cputime) {
0250 if (atomic64_cmpxchg(cputime, curr_cputime, sum_cputime) != curr_cputime)
0251 goto retry;
0252 }
0253 }
0254
0255 static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic,
0256 struct task_cputime *sum)
0257 {
0258 __update_gt_cputime(&cputime_atomic->utime, sum->utime);
0259 __update_gt_cputime(&cputime_atomic->stime, sum->stime);
0260 __update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime);
0261 }
0262
0263
0264
0265
0266
0267
0268
0269
0270
0271
0272
0273
0274 void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples)
0275 {
0276 struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
0277 struct posix_cputimers *pct = &tsk->signal->posix_cputimers;
0278
0279 WARN_ON_ONCE(!pct->timers_active);
0280
0281 proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
0282 }
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292
0293
0294
0295
0296 static void thread_group_start_cputime(struct task_struct *tsk, u64 *samples)
0297 {
0298 struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
0299 struct posix_cputimers *pct = &tsk->signal->posix_cputimers;
0300
0301 lockdep_assert_task_sighand_held(tsk);
0302
0303
0304 if (!READ_ONCE(pct->timers_active)) {
0305 struct task_cputime sum;
0306
0307
0308
0309
0310
0311
0312 thread_group_cputime(tsk, &sum);
0313 update_gt_cputime(&cputimer->cputime_atomic, &sum);
0314
0315
0316
0317
0318
0319
0320
0321
0322 WRITE_ONCE(pct->timers_active, true);
0323 }
0324 proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
0325 }
0326
0327 static void __thread_group_cputime(struct task_struct *tsk, u64 *samples)
0328 {
0329 struct task_cputime ct;
0330
0331 thread_group_cputime(tsk, &ct);
0332 store_samples(samples, ct.stime, ct.utime, ct.sum_exec_runtime);
0333 }
0334
0335
0336
0337
0338
0339
0340 static u64 cpu_clock_sample_group(const clockid_t clkid, struct task_struct *p,
0341 bool start)
0342 {
0343 struct thread_group_cputimer *cputimer = &p->signal->cputimer;
0344 struct posix_cputimers *pct = &p->signal->posix_cputimers;
0345 u64 samples[CPUCLOCK_MAX];
0346
0347 if (!READ_ONCE(pct->timers_active)) {
0348 if (start)
0349 thread_group_start_cputime(p, samples);
0350 else
0351 __thread_group_cputime(p, samples);
0352 } else {
0353 proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
0354 }
0355
0356 return samples[clkid];
0357 }
0358
0359 static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp)
0360 {
0361 const clockid_t clkid = CPUCLOCK_WHICH(clock);
0362 struct task_struct *tsk;
0363 u64 t;
0364
0365 rcu_read_lock();
0366 tsk = pid_task(pid_for_clock(clock, true), clock_pid_type(clock));
0367 if (!tsk) {
0368 rcu_read_unlock();
0369 return -EINVAL;
0370 }
0371
0372 if (CPUCLOCK_PERTHREAD(clock))
0373 t = cpu_clock_sample(clkid, tsk);
0374 else
0375 t = cpu_clock_sample_group(clkid, tsk, false);
0376 rcu_read_unlock();
0377
0378 *tp = ns_to_timespec64(t);
0379 return 0;
0380 }
0381
0382
0383
0384
0385
0386
0387 static int posix_cpu_timer_create(struct k_itimer *new_timer)
0388 {
0389 static struct lock_class_key posix_cpu_timers_key;
0390 struct pid *pid;
0391
0392 rcu_read_lock();
0393 pid = pid_for_clock(new_timer->it_clock, false);
0394 if (!pid) {
0395 rcu_read_unlock();
0396 return -EINVAL;
0397 }
0398
0399
0400
0401
0402
0403
0404
0405
0406
0407 if (IS_ENABLED(CONFIG_POSIX_CPU_TIMERS_TASK_WORK))
0408 lockdep_set_class(&new_timer->it_lock, &posix_cpu_timers_key);
0409
0410 new_timer->kclock = &clock_posix_cpu;
0411 timerqueue_init(&new_timer->it.cpu.node);
0412 new_timer->it.cpu.pid = get_pid(pid);
0413 rcu_read_unlock();
0414 return 0;
0415 }
0416
0417 static struct posix_cputimer_base *timer_base(struct k_itimer *timer,
0418 struct task_struct *tsk)
0419 {
0420 int clkidx = CPUCLOCK_WHICH(timer->it_clock);
0421
0422 if (CPUCLOCK_PERTHREAD(timer->it_clock))
0423 return tsk->posix_cputimers.bases + clkidx;
0424 else
0425 return tsk->signal->posix_cputimers.bases + clkidx;
0426 }
0427
0428
0429
0430
0431
0432
0433
0434 static void trigger_base_recalc_expires(struct k_itimer *timer,
0435 struct task_struct *tsk)
0436 {
0437 struct posix_cputimer_base *base = timer_base(timer, tsk);
0438
0439 base->nextevt = 0;
0440 }
0441
0442
0443
0444
0445
0446
0447
0448
0449
0450
0451
0452 static void disarm_timer(struct k_itimer *timer, struct task_struct *p)
0453 {
0454 struct cpu_timer *ctmr = &timer->it.cpu;
0455 struct posix_cputimer_base *base;
0456
0457 if (!cpu_timer_dequeue(ctmr))
0458 return;
0459
0460 base = timer_base(timer, p);
0461 if (cpu_timer_getexpires(ctmr) == base->nextevt)
0462 trigger_base_recalc_expires(timer, p);
0463 }
0464
0465
0466
0467
0468
0469
0470
0471
0472 static int posix_cpu_timer_del(struct k_itimer *timer)
0473 {
0474 struct cpu_timer *ctmr = &timer->it.cpu;
0475 struct sighand_struct *sighand;
0476 struct task_struct *p;
0477 unsigned long flags;
0478 int ret = 0;
0479
0480 rcu_read_lock();
0481 p = cpu_timer_task_rcu(timer);
0482 if (!p)
0483 goto out;
0484
0485
0486
0487
0488
0489 sighand = lock_task_sighand(p, &flags);
0490 if (unlikely(sighand == NULL)) {
0491
0492
0493
0494
0495 WARN_ON_ONCE(ctmr->head || timerqueue_node_queued(&ctmr->node));
0496 } else {
0497 if (timer->it.cpu.firing)
0498 ret = TIMER_RETRY;
0499 else
0500 disarm_timer(timer, p);
0501
0502 unlock_task_sighand(p, &flags);
0503 }
0504
0505 out:
0506 rcu_read_unlock();
0507 if (!ret)
0508 put_pid(ctmr->pid);
0509
0510 return ret;
0511 }
0512
0513 static void cleanup_timerqueue(struct timerqueue_head *head)
0514 {
0515 struct timerqueue_node *node;
0516 struct cpu_timer *ctmr;
0517
0518 while ((node = timerqueue_getnext(head))) {
0519 timerqueue_del(head, node);
0520 ctmr = container_of(node, struct cpu_timer, node);
0521 ctmr->head = NULL;
0522 }
0523 }
0524
0525
0526
0527
0528
0529
0530
0531
0532 static void cleanup_timers(struct posix_cputimers *pct)
0533 {
0534 cleanup_timerqueue(&pct->bases[CPUCLOCK_PROF].tqhead);
0535 cleanup_timerqueue(&pct->bases[CPUCLOCK_VIRT].tqhead);
0536 cleanup_timerqueue(&pct->bases[CPUCLOCK_SCHED].tqhead);
0537 }
0538
0539
0540
0541
0542
0543
0544 void posix_cpu_timers_exit(struct task_struct *tsk)
0545 {
0546 cleanup_timers(&tsk->posix_cputimers);
0547 }
0548 void posix_cpu_timers_exit_group(struct task_struct *tsk)
0549 {
0550 cleanup_timers(&tsk->signal->posix_cputimers);
0551 }
0552
0553
0554
0555
0556
0557 static void arm_timer(struct k_itimer *timer, struct task_struct *p)
0558 {
0559 struct posix_cputimer_base *base = timer_base(timer, p);
0560 struct cpu_timer *ctmr = &timer->it.cpu;
0561 u64 newexp = cpu_timer_getexpires(ctmr);
0562
0563 if (!cpu_timer_enqueue(&base->tqhead, ctmr))
0564 return;
0565
0566
0567
0568
0569
0570
0571
0572 if (newexp < base->nextevt)
0573 base->nextevt = newexp;
0574
0575 if (CPUCLOCK_PERTHREAD(timer->it_clock))
0576 tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
0577 else
0578 tick_dep_set_signal(p, TICK_DEP_BIT_POSIX_TIMER);
0579 }
0580
0581
0582
0583
0584 static void cpu_timer_fire(struct k_itimer *timer)
0585 {
0586 struct cpu_timer *ctmr = &timer->it.cpu;
0587
0588 if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
0589
0590
0591
0592 cpu_timer_setexpires(ctmr, 0);
0593 } else if (unlikely(timer->sigq == NULL)) {
0594
0595
0596
0597
0598 wake_up_process(timer->it_process);
0599 cpu_timer_setexpires(ctmr, 0);
0600 } else if (!timer->it_interval) {
0601
0602
0603
0604 posix_timer_event(timer, 0);
0605 cpu_timer_setexpires(ctmr, 0);
0606 } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
0607
0608
0609
0610
0611
0612
0613 posix_cpu_timer_rearm(timer);
0614 ++timer->it_requeue_pending;
0615 }
0616 }
0617
0618
0619
0620
0621
0622
0623
0624 static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
0625 struct itimerspec64 *new, struct itimerspec64 *old)
0626 {
0627 clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
0628 u64 old_expires, new_expires, old_incr, val;
0629 struct cpu_timer *ctmr = &timer->it.cpu;
0630 struct sighand_struct *sighand;
0631 struct task_struct *p;
0632 unsigned long flags;
0633 int ret = 0;
0634
0635 rcu_read_lock();
0636 p = cpu_timer_task_rcu(timer);
0637 if (!p) {
0638
0639
0640
0641
0642 rcu_read_unlock();
0643 return -ESRCH;
0644 }
0645
0646
0647
0648
0649
0650 new_expires = ktime_to_ns(timespec64_to_ktime(new->it_value));
0651
0652
0653
0654
0655
0656 sighand = lock_task_sighand(p, &flags);
0657
0658
0659
0660
0661 if (unlikely(sighand == NULL)) {
0662 rcu_read_unlock();
0663 return -ESRCH;
0664 }
0665
0666
0667
0668
0669 old_incr = timer->it_interval;
0670 old_expires = cpu_timer_getexpires(ctmr);
0671
0672 if (unlikely(timer->it.cpu.firing)) {
0673 timer->it.cpu.firing = -1;
0674 ret = TIMER_RETRY;
0675 } else {
0676 cpu_timer_dequeue(ctmr);
0677 }
0678
0679
0680
0681
0682
0683
0684
0685
0686
0687 if (CPUCLOCK_PERTHREAD(timer->it_clock))
0688 val = cpu_clock_sample(clkid, p);
0689 else
0690 val = cpu_clock_sample_group(clkid, p, true);
0691
0692 if (old) {
0693 if (old_expires == 0) {
0694 old->it_value.tv_sec = 0;
0695 old->it_value.tv_nsec = 0;
0696 } else {
0697
0698
0699
0700
0701
0702
0703
0704 u64 exp = bump_cpu_timer(timer, val);
0705
0706 if (val < exp) {
0707 old_expires = exp - val;
0708 old->it_value = ns_to_timespec64(old_expires);
0709 } else {
0710 old->it_value.tv_nsec = 1;
0711 old->it_value.tv_sec = 0;
0712 }
0713 }
0714 }
0715
0716 if (unlikely(ret)) {
0717
0718
0719
0720
0721
0722
0723 unlock_task_sighand(p, &flags);
0724 goto out;
0725 }
0726
0727 if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) {
0728 new_expires += val;
0729 }
0730
0731
0732
0733
0734
0735
0736 cpu_timer_setexpires(ctmr, new_expires);
0737 if (new_expires != 0 && val < new_expires) {
0738 arm_timer(timer, p);
0739 }
0740
0741 unlock_task_sighand(p, &flags);
0742
0743
0744
0745
0746 timer->it_interval = timespec64_to_ktime(new->it_interval);
0747
0748
0749
0750
0751
0752
0753 timer->it_requeue_pending = (timer->it_requeue_pending + 2) &
0754 ~REQUEUE_PENDING;
0755 timer->it_overrun_last = 0;
0756 timer->it_overrun = -1;
0757
0758 if (val >= new_expires) {
0759 if (new_expires != 0) {
0760
0761
0762
0763
0764
0765 cpu_timer_fire(timer);
0766 }
0767
0768
0769
0770
0771
0772 sighand = lock_task_sighand(p, &flags);
0773 if (!sighand)
0774 goto out;
0775
0776 if (!cpu_timer_queued(ctmr))
0777 trigger_base_recalc_expires(timer, p);
0778
0779 unlock_task_sighand(p, &flags);
0780 }
0781 out:
0782 rcu_read_unlock();
0783 if (old)
0784 old->it_interval = ns_to_timespec64(old_incr);
0785
0786 return ret;
0787 }
0788
0789 static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp)
0790 {
0791 clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
0792 struct cpu_timer *ctmr = &timer->it.cpu;
0793 u64 now, expires = cpu_timer_getexpires(ctmr);
0794 struct task_struct *p;
0795
0796 rcu_read_lock();
0797 p = cpu_timer_task_rcu(timer);
0798 if (!p)
0799 goto out;
0800
0801
0802
0803
0804 itp->it_interval = ktime_to_timespec64(timer->it_interval);
0805
0806 if (!expires)
0807 goto out;
0808
0809
0810
0811
0812 if (CPUCLOCK_PERTHREAD(timer->it_clock))
0813 now = cpu_clock_sample(clkid, p);
0814 else
0815 now = cpu_clock_sample_group(clkid, p, false);
0816
0817 if (now < expires) {
0818 itp->it_value = ns_to_timespec64(expires - now);
0819 } else {
0820
0821
0822
0823
0824 itp->it_value.tv_nsec = 1;
0825 itp->it_value.tv_sec = 0;
0826 }
0827 out:
0828 rcu_read_unlock();
0829 }
0830
0831 #define MAX_COLLECTED 20
0832
0833 static u64 collect_timerqueue(struct timerqueue_head *head,
0834 struct list_head *firing, u64 now)
0835 {
0836 struct timerqueue_node *next;
0837 int i = 0;
0838
0839 while ((next = timerqueue_getnext(head))) {
0840 struct cpu_timer *ctmr;
0841 u64 expires;
0842
0843 ctmr = container_of(next, struct cpu_timer, node);
0844 expires = cpu_timer_getexpires(ctmr);
0845
0846 if (++i == MAX_COLLECTED || now < expires)
0847 return expires;
0848
0849 ctmr->firing = 1;
0850 cpu_timer_dequeue(ctmr);
0851 list_add_tail(&ctmr->elist, firing);
0852 }
0853
0854 return U64_MAX;
0855 }
0856
0857 static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples,
0858 struct list_head *firing)
0859 {
0860 struct posix_cputimer_base *base = pct->bases;
0861 int i;
0862
0863 for (i = 0; i < CPUCLOCK_MAX; i++, base++) {
0864 base->nextevt = collect_timerqueue(&base->tqhead, firing,
0865 samples[i]);
0866 }
0867 }
0868
0869 static inline void check_dl_overrun(struct task_struct *tsk)
0870 {
0871 if (tsk->dl.dl_overrun) {
0872 tsk->dl.dl_overrun = 0;
0873 send_signal_locked(SIGXCPU, SEND_SIG_PRIV, tsk, PIDTYPE_TGID);
0874 }
0875 }
0876
0877 static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard)
0878 {
0879 if (time < limit)
0880 return false;
0881
0882 if (print_fatal_signals) {
0883 pr_info("%s Watchdog Timeout (%s): %s[%d]\n",
0884 rt ? "RT" : "CPU", hard ? "hard" : "soft",
0885 current->comm, task_pid_nr(current));
0886 }
0887 send_signal_locked(signo, SEND_SIG_PRIV, current, PIDTYPE_TGID);
0888 return true;
0889 }
0890
0891
0892
0893
0894
0895
0896 static void check_thread_timers(struct task_struct *tsk,
0897 struct list_head *firing)
0898 {
0899 struct posix_cputimers *pct = &tsk->posix_cputimers;
0900 u64 samples[CPUCLOCK_MAX];
0901 unsigned long soft;
0902
0903 if (dl_task(tsk))
0904 check_dl_overrun(tsk);
0905
0906 if (expiry_cache_is_inactive(pct))
0907 return;
0908
0909 task_sample_cputime(tsk, samples);
0910 collect_posix_cputimers(pct, samples, firing);
0911
0912
0913
0914
0915 soft = task_rlimit(tsk, RLIMIT_RTTIME);
0916 if (soft != RLIM_INFINITY) {
0917
0918 unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);
0919 unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
0920
0921
0922 if (hard != RLIM_INFINITY &&
0923 check_rlimit(rttime, hard, SIGKILL, true, true))
0924 return;
0925
0926
0927 if (check_rlimit(rttime, soft, SIGXCPU, true, false)) {
0928 soft += USEC_PER_SEC;
0929 tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur = soft;
0930 }
0931 }
0932
0933 if (expiry_cache_is_inactive(pct))
0934 tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
0935 }
0936
0937 static inline void stop_process_timers(struct signal_struct *sig)
0938 {
0939 struct posix_cputimers *pct = &sig->posix_cputimers;
0940
0941
0942 WRITE_ONCE(pct->timers_active, false);
0943 tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER);
0944 }
0945
0946 static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
0947 u64 *expires, u64 cur_time, int signo)
0948 {
0949 if (!it->expires)
0950 return;
0951
0952 if (cur_time >= it->expires) {
0953 if (it->incr)
0954 it->expires += it->incr;
0955 else
0956 it->expires = 0;
0957
0958 trace_itimer_expire(signo == SIGPROF ?
0959 ITIMER_PROF : ITIMER_VIRTUAL,
0960 task_tgid(tsk), cur_time);
0961 send_signal_locked(signo, SEND_SIG_PRIV, tsk, PIDTYPE_TGID);
0962 }
0963
0964 if (it->expires && it->expires < *expires)
0965 *expires = it->expires;
0966 }
0967
0968
0969
0970
0971
0972
0973 static void check_process_timers(struct task_struct *tsk,
0974 struct list_head *firing)
0975 {
0976 struct signal_struct *const sig = tsk->signal;
0977 struct posix_cputimers *pct = &sig->posix_cputimers;
0978 u64 samples[CPUCLOCK_MAX];
0979 unsigned long soft;
0980
0981
0982
0983
0984
0985
0986 if (!READ_ONCE(pct->timers_active) || pct->expiry_active)
0987 return;
0988
0989
0990
0991
0992
0993 pct->expiry_active = true;
0994
0995
0996
0997
0998
0999 proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic, samples);
1000 collect_posix_cputimers(pct, samples, firing);
1001
1002
1003
1004
1005 check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF],
1006 &pct->bases[CPUCLOCK_PROF].nextevt,
1007 samples[CPUCLOCK_PROF], SIGPROF);
1008 check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT],
1009 &pct->bases[CPUCLOCK_VIRT].nextevt,
1010 samples[CPUCLOCK_VIRT], SIGVTALRM);
1011
1012 soft = task_rlimit(tsk, RLIMIT_CPU);
1013 if (soft != RLIM_INFINITY) {
1014
1015 unsigned long hard = task_rlimit_max(tsk, RLIMIT_CPU);
1016 u64 ptime = samples[CPUCLOCK_PROF];
1017 u64 softns = (u64)soft * NSEC_PER_SEC;
1018 u64 hardns = (u64)hard * NSEC_PER_SEC;
1019
1020
1021 if (hard != RLIM_INFINITY &&
1022 check_rlimit(ptime, hardns, SIGKILL, false, true))
1023 return;
1024
1025
1026 if (check_rlimit(ptime, softns, SIGXCPU, false, false)) {
1027 sig->rlim[RLIMIT_CPU].rlim_cur = soft + 1;
1028 softns += NSEC_PER_SEC;
1029 }
1030
1031
1032 if (softns < pct->bases[CPUCLOCK_PROF].nextevt)
1033 pct->bases[CPUCLOCK_PROF].nextevt = softns;
1034 }
1035
1036 if (expiry_cache_is_inactive(pct))
1037 stop_process_timers(sig);
1038
1039 pct->expiry_active = false;
1040 }
1041
1042
1043
1044
1045
1046 static void posix_cpu_timer_rearm(struct k_itimer *timer)
1047 {
1048 clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
1049 struct task_struct *p;
1050 struct sighand_struct *sighand;
1051 unsigned long flags;
1052 u64 now;
1053
1054 rcu_read_lock();
1055 p = cpu_timer_task_rcu(timer);
1056 if (!p)
1057 goto out;
1058
1059
1060 sighand = lock_task_sighand(p, &flags);
1061 if (unlikely(sighand == NULL))
1062 goto out;
1063
1064
1065
1066
1067 if (CPUCLOCK_PERTHREAD(timer->it_clock))
1068 now = cpu_clock_sample(clkid, p);
1069 else
1070 now = cpu_clock_sample_group(clkid, p, true);
1071
1072 bump_cpu_timer(timer, now);
1073
1074
1075
1076
1077 arm_timer(timer, p);
1078 unlock_task_sighand(p, &flags);
1079 out:
1080 rcu_read_unlock();
1081 }
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092 static inline bool
1093 task_cputimers_expired(const u64 *samples, struct posix_cputimers *pct)
1094 {
1095 int i;
1096
1097 for (i = 0; i < CPUCLOCK_MAX; i++) {
1098 if (samples[i] >= pct->bases[i].nextevt)
1099 return true;
1100 }
1101 return false;
1102 }
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114 static inline bool fastpath_timer_check(struct task_struct *tsk)
1115 {
1116 struct posix_cputimers *pct = &tsk->posix_cputimers;
1117 struct signal_struct *sig;
1118
1119 if (!expiry_cache_is_inactive(pct)) {
1120 u64 samples[CPUCLOCK_MAX];
1121
1122 task_sample_cputime(tsk, samples);
1123 if (task_cputimers_expired(samples, pct))
1124 return true;
1125 }
1126
1127 sig = tsk->signal;
1128 pct = &sig->posix_cputimers;
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144 if (READ_ONCE(pct->timers_active) && !READ_ONCE(pct->expiry_active)) {
1145 u64 samples[CPUCLOCK_MAX];
1146
1147 proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic,
1148 samples);
1149
1150 if (task_cputimers_expired(samples, pct))
1151 return true;
1152 }
1153
1154 if (dl_task(tsk) && tsk->dl.dl_overrun)
1155 return true;
1156
1157 return false;
1158 }
1159
1160 static void handle_posix_cpu_timers(struct task_struct *tsk);
1161
1162 #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
1163 static void posix_cpu_timers_work(struct callback_head *work)
1164 {
1165 handle_posix_cpu_timers(current);
1166 }
1167
1168
1169
1170
1171 void clear_posix_cputimers_work(struct task_struct *p)
1172 {
1173
1174
1175
1176
1177 memset(&p->posix_cputimers_work.work, 0,
1178 sizeof(p->posix_cputimers_work.work));
1179 init_task_work(&p->posix_cputimers_work.work,
1180 posix_cpu_timers_work);
1181 p->posix_cputimers_work.scheduled = false;
1182 }
1183
1184
1185
1186
1187
1188 void __init posix_cputimers_init_work(void)
1189 {
1190 clear_posix_cputimers_work(current);
1191 }
1192
1193
1194
1195
1196
1197
1198
1199 static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk)
1200 {
1201 return tsk->posix_cputimers_work.scheduled;
1202 }
1203
1204 static inline void __run_posix_cpu_timers(struct task_struct *tsk)
1205 {
1206 if (WARN_ON_ONCE(tsk->posix_cputimers_work.scheduled))
1207 return;
1208
1209
1210 tsk->posix_cputimers_work.scheduled = true;
1211 task_work_add(tsk, &tsk->posix_cputimers_work.work, TWA_RESUME);
1212 }
1213
1214 static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk,
1215 unsigned long start)
1216 {
1217 bool ret = true;
1218
1219
1220
1221
1222
1223
1224 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
1225 tsk->posix_cputimers_work.scheduled = false;
1226 return true;
1227 }
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242 local_irq_disable();
1243 if (start != jiffies && fastpath_timer_check(tsk))
1244 ret = false;
1245 else
1246 tsk->posix_cputimers_work.scheduled = false;
1247 local_irq_enable();
1248
1249 return ret;
1250 }
1251 #else
1252 static inline void __run_posix_cpu_timers(struct task_struct *tsk)
1253 {
1254 lockdep_posixtimer_enter();
1255 handle_posix_cpu_timers(tsk);
1256 lockdep_posixtimer_exit();
1257 }
1258
1259 static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk)
1260 {
1261 return false;
1262 }
1263
1264 static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk,
1265 unsigned long start)
1266 {
1267 return true;
1268 }
1269 #endif
1270
1271 static void handle_posix_cpu_timers(struct task_struct *tsk)
1272 {
1273 struct k_itimer *timer, *next;
1274 unsigned long flags, start;
1275 LIST_HEAD(firing);
1276
1277 if (!lock_task_sighand(tsk, &flags))
1278 return;
1279
1280 do {
1281
1282
1283
1284
1285
1286 start = READ_ONCE(jiffies);
1287 barrier();
1288
1289
1290
1291
1292
1293
1294 check_thread_timers(tsk, &firing);
1295
1296 check_process_timers(tsk, &firing);
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328 } while (!posix_cpu_timers_enable_work(tsk, start));
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338 unlock_task_sighand(tsk, &flags);
1339
1340
1341
1342
1343
1344
1345
1346 list_for_each_entry_safe(timer, next, &firing, it.cpu.elist) {
1347 int cpu_firing;
1348
1349
1350
1351
1352
1353
1354
1355
1356 spin_lock(&timer->it_lock);
1357 list_del_init(&timer->it.cpu.elist);
1358 cpu_firing = timer->it.cpu.firing;
1359 timer->it.cpu.firing = 0;
1360
1361
1362
1363
1364
1365 if (likely(cpu_firing >= 0))
1366 cpu_timer_fire(timer);
1367 spin_unlock(&timer->it_lock);
1368 }
1369 }
1370
1371
1372
1373
1374
1375
1376 void run_posix_cpu_timers(void)
1377 {
1378 struct task_struct *tsk = current;
1379
1380 lockdep_assert_irqs_disabled();
1381
1382
1383
1384
1385
1386 if (posix_cpu_timers_work_scheduled(tsk))
1387 return;
1388
1389
1390
1391
1392
1393 if (!fastpath_timer_check(tsk))
1394 return;
1395
1396 __run_posix_cpu_timers(tsk);
1397 }
1398
1399
1400
1401
1402
1403 void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid,
1404 u64 *newval, u64 *oldval)
1405 {
1406 u64 now, *nextevt;
1407
1408 if (WARN_ON_ONCE(clkid >= CPUCLOCK_SCHED))
1409 return;
1410
1411 nextevt = &tsk->signal->posix_cputimers.bases[clkid].nextevt;
1412 now = cpu_clock_sample_group(clkid, tsk, true);
1413
1414 if (oldval) {
1415
1416
1417
1418
1419
1420 if (*oldval) {
1421 if (*oldval <= now) {
1422
1423 *oldval = TICK_NSEC;
1424 } else {
1425 *oldval -= now;
1426 }
1427 }
1428
1429 if (*newval)
1430 *newval += now;
1431 }
1432
1433
1434
1435
1436
1437 if (*newval < *nextevt)
1438 *nextevt = *newval;
1439
1440 tick_dep_set_signal(tsk, TICK_DEP_BIT_POSIX_TIMER);
1441 }
1442
1443 static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
1444 const struct timespec64 *rqtp)
1445 {
1446 struct itimerspec64 it;
1447 struct k_itimer timer;
1448 u64 expires;
1449 int error;
1450
1451
1452
1453
1454 memset(&timer, 0, sizeof timer);
1455 spin_lock_init(&timer.it_lock);
1456 timer.it_clock = which_clock;
1457 timer.it_overrun = -1;
1458 error = posix_cpu_timer_create(&timer);
1459 timer.it_process = current;
1460
1461 if (!error) {
1462 static struct itimerspec64 zero_it;
1463 struct restart_block *restart;
1464
1465 memset(&it, 0, sizeof(it));
1466 it.it_value = *rqtp;
1467
1468 spin_lock_irq(&timer.it_lock);
1469 error = posix_cpu_timer_set(&timer, flags, &it, NULL);
1470 if (error) {
1471 spin_unlock_irq(&timer.it_lock);
1472 return error;
1473 }
1474
1475 while (!signal_pending(current)) {
1476 if (!cpu_timer_getexpires(&timer.it.cpu)) {
1477
1478
1479
1480
1481 posix_cpu_timer_del(&timer);
1482 spin_unlock_irq(&timer.it_lock);
1483 return 0;
1484 }
1485
1486
1487
1488
1489 __set_current_state(TASK_INTERRUPTIBLE);
1490 spin_unlock_irq(&timer.it_lock);
1491 schedule();
1492 spin_lock_irq(&timer.it_lock);
1493 }
1494
1495
1496
1497
1498 expires = cpu_timer_getexpires(&timer.it.cpu);
1499 error = posix_cpu_timer_set(&timer, 0, &zero_it, &it);
1500 if (!error) {
1501
1502
1503
1504 posix_cpu_timer_del(&timer);
1505 }
1506 spin_unlock_irq(&timer.it_lock);
1507
1508 while (error == TIMER_RETRY) {
1509
1510
1511
1512
1513
1514 spin_lock_irq(&timer.it_lock);
1515 error = posix_cpu_timer_del(&timer);
1516 spin_unlock_irq(&timer.it_lock);
1517 }
1518
1519 if ((it.it_value.tv_sec | it.it_value.tv_nsec) == 0) {
1520
1521
1522
1523 return 0;
1524 }
1525
1526 error = -ERESTART_RESTARTBLOCK;
1527
1528
1529
1530 restart = ¤t->restart_block;
1531 restart->nanosleep.expires = expires;
1532 if (restart->nanosleep.type != TT_NONE)
1533 error = nanosleep_copyout(restart, &it.it_value);
1534 }
1535
1536 return error;
1537 }
1538
1539 static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
1540
1541 static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
1542 const struct timespec64 *rqtp)
1543 {
1544 struct restart_block *restart_block = ¤t->restart_block;
1545 int error;
1546
1547
1548
1549
1550 if (CPUCLOCK_PERTHREAD(which_clock) &&
1551 (CPUCLOCK_PID(which_clock) == 0 ||
1552 CPUCLOCK_PID(which_clock) == task_pid_vnr(current)))
1553 return -EINVAL;
1554
1555 error = do_cpu_nanosleep(which_clock, flags, rqtp);
1556
1557 if (error == -ERESTART_RESTARTBLOCK) {
1558
1559 if (flags & TIMER_ABSTIME)
1560 return -ERESTARTNOHAND;
1561
1562 restart_block->nanosleep.clockid = which_clock;
1563 set_restart_fn(restart_block, posix_cpu_nsleep_restart);
1564 }
1565 return error;
1566 }
1567
1568 static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
1569 {
1570 clockid_t which_clock = restart_block->nanosleep.clockid;
1571 struct timespec64 t;
1572
1573 t = ns_to_timespec64(restart_block->nanosleep.expires);
1574
1575 return do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t);
1576 }
1577
1578 #define PROCESS_CLOCK make_process_cpuclock(0, CPUCLOCK_SCHED)
1579 #define THREAD_CLOCK make_thread_cpuclock(0, CPUCLOCK_SCHED)
1580
1581 static int process_cpu_clock_getres(const clockid_t which_clock,
1582 struct timespec64 *tp)
1583 {
1584 return posix_cpu_clock_getres(PROCESS_CLOCK, tp);
1585 }
1586 static int process_cpu_clock_get(const clockid_t which_clock,
1587 struct timespec64 *tp)
1588 {
1589 return posix_cpu_clock_get(PROCESS_CLOCK, tp);
1590 }
1591 static int process_cpu_timer_create(struct k_itimer *timer)
1592 {
1593 timer->it_clock = PROCESS_CLOCK;
1594 return posix_cpu_timer_create(timer);
1595 }
1596 static int process_cpu_nsleep(const clockid_t which_clock, int flags,
1597 const struct timespec64 *rqtp)
1598 {
1599 return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp);
1600 }
1601 static int thread_cpu_clock_getres(const clockid_t which_clock,
1602 struct timespec64 *tp)
1603 {
1604 return posix_cpu_clock_getres(THREAD_CLOCK, tp);
1605 }
1606 static int thread_cpu_clock_get(const clockid_t which_clock,
1607 struct timespec64 *tp)
1608 {
1609 return posix_cpu_clock_get(THREAD_CLOCK, tp);
1610 }
1611 static int thread_cpu_timer_create(struct k_itimer *timer)
1612 {
1613 timer->it_clock = THREAD_CLOCK;
1614 return posix_cpu_timer_create(timer);
1615 }
1616
1617 const struct k_clock clock_posix_cpu = {
1618 .clock_getres = posix_cpu_clock_getres,
1619 .clock_set = posix_cpu_clock_set,
1620 .clock_get_timespec = posix_cpu_clock_get,
1621 .timer_create = posix_cpu_timer_create,
1622 .nsleep = posix_cpu_nsleep,
1623 .timer_set = posix_cpu_timer_set,
1624 .timer_del = posix_cpu_timer_del,
1625 .timer_get = posix_cpu_timer_get,
1626 .timer_rearm = posix_cpu_timer_rearm,
1627 };
1628
1629 const struct k_clock clock_process = {
1630 .clock_getres = process_cpu_clock_getres,
1631 .clock_get_timespec = process_cpu_clock_get,
1632 .timer_create = process_cpu_timer_create,
1633 .nsleep = process_cpu_nsleep,
1634 };
1635
1636 const struct k_clock clock_thread = {
1637 .clock_getres = thread_cpu_clock_getres,
1638 .clock_get_timespec = thread_cpu_clock_get,
1639 .timer_create = thread_cpu_timer_create,
1640 };