Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  *  fs/timerfd.c
0004  *
0005  *  Copyright (C) 2007  Davide Libenzi <davidel@xmailserver.org>
0006  *
0007  *
0008  *  Thanks to Thomas Gleixner for code reviews and useful comments.
0009  *
0010  */
0011 
0012 #include <linux/alarmtimer.h>
0013 #include <linux/file.h>
0014 #include <linux/poll.h>
0015 #include <linux/init.h>
0016 #include <linux/fs.h>
0017 #include <linux/sched.h>
0018 #include <linux/kernel.h>
0019 #include <linux/slab.h>
0020 #include <linux/list.h>
0021 #include <linux/spinlock.h>
0022 #include <linux/time.h>
0023 #include <linux/hrtimer.h>
0024 #include <linux/anon_inodes.h>
0025 #include <linux/timerfd.h>
0026 #include <linux/syscalls.h>
0027 #include <linux/compat.h>
0028 #include <linux/rcupdate.h>
0029 #include <linux/time_namespace.h>
0030 
0031 struct timerfd_ctx {
0032     union {
0033         struct hrtimer tmr;
0034         struct alarm alarm;
0035     } t;
0036     ktime_t tintv;
0037     ktime_t moffs;
0038     wait_queue_head_t wqh;
0039     u64 ticks;
0040     int clockid;
0041     short unsigned expired;
0042     short unsigned settime_flags;   /* to show in fdinfo */
0043     struct rcu_head rcu;
0044     struct list_head clist;
0045     spinlock_t cancel_lock;
0046     bool might_cancel;
0047 };
0048 
0049 static LIST_HEAD(cancel_list);
0050 static DEFINE_SPINLOCK(cancel_lock);
0051 
0052 static inline bool isalarm(struct timerfd_ctx *ctx)
0053 {
0054     return ctx->clockid == CLOCK_REALTIME_ALARM ||
0055         ctx->clockid == CLOCK_BOOTTIME_ALARM;
0056 }
0057 
0058 /*
0059  * This gets called when the timer event triggers. We set the "expired"
0060  * flag, but we do not re-arm the timer (in case it's necessary,
0061  * tintv != 0) until the timer is accessed.
0062  */
0063 static void timerfd_triggered(struct timerfd_ctx *ctx)
0064 {
0065     unsigned long flags;
0066 
0067     spin_lock_irqsave(&ctx->wqh.lock, flags);
0068     ctx->expired = 1;
0069     ctx->ticks++;
0070     wake_up_locked_poll(&ctx->wqh, EPOLLIN);
0071     spin_unlock_irqrestore(&ctx->wqh.lock, flags);
0072 }
0073 
0074 static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
0075 {
0076     struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx,
0077                            t.tmr);
0078     timerfd_triggered(ctx);
0079     return HRTIMER_NORESTART;
0080 }
0081 
0082 static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm,
0083     ktime_t now)
0084 {
0085     struct timerfd_ctx *ctx = container_of(alarm, struct timerfd_ctx,
0086                            t.alarm);
0087     timerfd_triggered(ctx);
0088     return ALARMTIMER_NORESTART;
0089 }
0090 
0091 /*
0092  * Called when the clock was set to cancel the timers in the cancel
0093  * list. This will wake up processes waiting on these timers. The
0094  * wake-up requires ctx->ticks to be non zero, therefore we increment
0095  * it before calling wake_up_locked().
0096  */
0097 void timerfd_clock_was_set(void)
0098 {
0099     ktime_t moffs = ktime_mono_to_real(0);
0100     struct timerfd_ctx *ctx;
0101     unsigned long flags;
0102 
0103     rcu_read_lock();
0104     list_for_each_entry_rcu(ctx, &cancel_list, clist) {
0105         if (!ctx->might_cancel)
0106             continue;
0107         spin_lock_irqsave(&ctx->wqh.lock, flags);
0108         if (ctx->moffs != moffs) {
0109             ctx->moffs = KTIME_MAX;
0110             ctx->ticks++;
0111             wake_up_locked_poll(&ctx->wqh, EPOLLIN);
0112         }
0113         spin_unlock_irqrestore(&ctx->wqh.lock, flags);
0114     }
0115     rcu_read_unlock();
0116 }
0117 
0118 static void timerfd_resume_work(struct work_struct *work)
0119 {
0120     timerfd_clock_was_set();
0121 }
0122 
0123 static DECLARE_WORK(timerfd_work, timerfd_resume_work);
0124 
0125 /*
0126  * Invoked from timekeeping_resume(). Defer the actual update to work so
0127  * timerfd_clock_was_set() runs in task context.
0128  */
0129 void timerfd_resume(void)
0130 {
0131     schedule_work(&timerfd_work);
0132 }
0133 
0134 static void __timerfd_remove_cancel(struct timerfd_ctx *ctx)
0135 {
0136     if (ctx->might_cancel) {
0137         ctx->might_cancel = false;
0138         spin_lock(&cancel_lock);
0139         list_del_rcu(&ctx->clist);
0140         spin_unlock(&cancel_lock);
0141     }
0142 }
0143 
0144 static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
0145 {
0146     spin_lock(&ctx->cancel_lock);
0147     __timerfd_remove_cancel(ctx);
0148     spin_unlock(&ctx->cancel_lock);
0149 }
0150 
0151 static bool timerfd_canceled(struct timerfd_ctx *ctx)
0152 {
0153     if (!ctx->might_cancel || ctx->moffs != KTIME_MAX)
0154         return false;
0155     ctx->moffs = ktime_mono_to_real(0);
0156     return true;
0157 }
0158 
0159 static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
0160 {
0161     spin_lock(&ctx->cancel_lock);
0162     if ((ctx->clockid == CLOCK_REALTIME ||
0163          ctx->clockid == CLOCK_REALTIME_ALARM) &&
0164         (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) {
0165         if (!ctx->might_cancel) {
0166             ctx->might_cancel = true;
0167             spin_lock(&cancel_lock);
0168             list_add_rcu(&ctx->clist, &cancel_list);
0169             spin_unlock(&cancel_lock);
0170         }
0171     } else {
0172         __timerfd_remove_cancel(ctx);
0173     }
0174     spin_unlock(&ctx->cancel_lock);
0175 }
0176 
0177 static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
0178 {
0179     ktime_t remaining;
0180 
0181     if (isalarm(ctx))
0182         remaining = alarm_expires_remaining(&ctx->t.alarm);
0183     else
0184         remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr);
0185 
0186     return remaining < 0 ? 0: remaining;
0187 }
0188 
0189 static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
0190              const struct itimerspec64 *ktmr)
0191 {
0192     enum hrtimer_mode htmode;
0193     ktime_t texp;
0194     int clockid = ctx->clockid;
0195 
0196     htmode = (flags & TFD_TIMER_ABSTIME) ?
0197         HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
0198 
0199     texp = timespec64_to_ktime(ktmr->it_value);
0200     ctx->expired = 0;
0201     ctx->ticks = 0;
0202     ctx->tintv = timespec64_to_ktime(ktmr->it_interval);
0203 
0204     if (isalarm(ctx)) {
0205         alarm_init(&ctx->t.alarm,
0206                ctx->clockid == CLOCK_REALTIME_ALARM ?
0207                ALARM_REALTIME : ALARM_BOOTTIME,
0208                timerfd_alarmproc);
0209     } else {
0210         hrtimer_init(&ctx->t.tmr, clockid, htmode);
0211         hrtimer_set_expires(&ctx->t.tmr, texp);
0212         ctx->t.tmr.function = timerfd_tmrproc;
0213     }
0214 
0215     if (texp != 0) {
0216         if (flags & TFD_TIMER_ABSTIME)
0217             texp = timens_ktime_to_host(clockid, texp);
0218         if (isalarm(ctx)) {
0219             if (flags & TFD_TIMER_ABSTIME)
0220                 alarm_start(&ctx->t.alarm, texp);
0221             else
0222                 alarm_start_relative(&ctx->t.alarm, texp);
0223         } else {
0224             hrtimer_start(&ctx->t.tmr, texp, htmode);
0225         }
0226 
0227         if (timerfd_canceled(ctx))
0228             return -ECANCELED;
0229     }
0230 
0231     ctx->settime_flags = flags & TFD_SETTIME_FLAGS;
0232     return 0;
0233 }
0234 
0235 static int timerfd_release(struct inode *inode, struct file *file)
0236 {
0237     struct timerfd_ctx *ctx = file->private_data;
0238 
0239     timerfd_remove_cancel(ctx);
0240 
0241     if (isalarm(ctx))
0242         alarm_cancel(&ctx->t.alarm);
0243     else
0244         hrtimer_cancel(&ctx->t.tmr);
0245     kfree_rcu(ctx, rcu);
0246     return 0;
0247 }
0248 
0249 static __poll_t timerfd_poll(struct file *file, poll_table *wait)
0250 {
0251     struct timerfd_ctx *ctx = file->private_data;
0252     __poll_t events = 0;
0253     unsigned long flags;
0254 
0255     poll_wait(file, &ctx->wqh, wait);
0256 
0257     spin_lock_irqsave(&ctx->wqh.lock, flags);
0258     if (ctx->ticks)
0259         events |= EPOLLIN;
0260     spin_unlock_irqrestore(&ctx->wqh.lock, flags);
0261 
0262     return events;
0263 }
0264 
0265 static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
0266                 loff_t *ppos)
0267 {
0268     struct timerfd_ctx *ctx = file->private_data;
0269     ssize_t res;
0270     u64 ticks = 0;
0271 
0272     if (count < sizeof(ticks))
0273         return -EINVAL;
0274     spin_lock_irq(&ctx->wqh.lock);
0275     if (file->f_flags & O_NONBLOCK)
0276         res = -EAGAIN;
0277     else
0278         res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
0279 
0280     /*
0281      * If clock has changed, we do not care about the
0282      * ticks and we do not rearm the timer. Userspace must
0283      * reevaluate anyway.
0284      */
0285     if (timerfd_canceled(ctx)) {
0286         ctx->ticks = 0;
0287         ctx->expired = 0;
0288         res = -ECANCELED;
0289     }
0290 
0291     if (ctx->ticks) {
0292         ticks = ctx->ticks;
0293 
0294         if (ctx->expired && ctx->tintv) {
0295             /*
0296              * If tintv != 0, this is a periodic timer that
0297              * needs to be re-armed. We avoid doing it in the timer
0298              * callback to avoid DoS attacks specifying a very
0299              * short timer period.
0300              */
0301             if (isalarm(ctx)) {
0302                 ticks += alarm_forward_now(
0303                     &ctx->t.alarm, ctx->tintv) - 1;
0304                 alarm_restart(&ctx->t.alarm);
0305             } else {
0306                 ticks += hrtimer_forward_now(&ctx->t.tmr,
0307                                  ctx->tintv) - 1;
0308                 hrtimer_restart(&ctx->t.tmr);
0309             }
0310         }
0311         ctx->expired = 0;
0312         ctx->ticks = 0;
0313     }
0314     spin_unlock_irq(&ctx->wqh.lock);
0315     if (ticks)
0316         res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
0317     return res;
0318 }
0319 
0320 #ifdef CONFIG_PROC_FS
0321 static void timerfd_show(struct seq_file *m, struct file *file)
0322 {
0323     struct timerfd_ctx *ctx = file->private_data;
0324     struct timespec64 value, interval;
0325 
0326     spin_lock_irq(&ctx->wqh.lock);
0327     value = ktime_to_timespec64(timerfd_get_remaining(ctx));
0328     interval = ktime_to_timespec64(ctx->tintv);
0329     spin_unlock_irq(&ctx->wqh.lock);
0330 
0331     seq_printf(m,
0332            "clockid: %d\n"
0333            "ticks: %llu\n"
0334            "settime flags: 0%o\n"
0335            "it_value: (%llu, %llu)\n"
0336            "it_interval: (%llu, %llu)\n",
0337            ctx->clockid,
0338            (unsigned long long)ctx->ticks,
0339            ctx->settime_flags,
0340            (unsigned long long)value.tv_sec,
0341            (unsigned long long)value.tv_nsec,
0342            (unsigned long long)interval.tv_sec,
0343            (unsigned long long)interval.tv_nsec);
0344 }
0345 #else
0346 #define timerfd_show NULL
0347 #endif
0348 
0349 #ifdef CONFIG_CHECKPOINT_RESTORE
0350 static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
0351 {
0352     struct timerfd_ctx *ctx = file->private_data;
0353     int ret = 0;
0354 
0355     switch (cmd) {
0356     case TFD_IOC_SET_TICKS: {
0357         u64 ticks;
0358 
0359         if (copy_from_user(&ticks, (u64 __user *)arg, sizeof(ticks)))
0360             return -EFAULT;
0361         if (!ticks)
0362             return -EINVAL;
0363 
0364         spin_lock_irq(&ctx->wqh.lock);
0365         if (!timerfd_canceled(ctx)) {
0366             ctx->ticks = ticks;
0367             wake_up_locked_poll(&ctx->wqh, EPOLLIN);
0368         } else
0369             ret = -ECANCELED;
0370         spin_unlock_irq(&ctx->wqh.lock);
0371         break;
0372     }
0373     default:
0374         ret = -ENOTTY;
0375         break;
0376     }
0377 
0378     return ret;
0379 }
0380 #else
0381 #define timerfd_ioctl NULL
0382 #endif
0383 
0384 static const struct file_operations timerfd_fops = {
0385     .release    = timerfd_release,
0386     .poll       = timerfd_poll,
0387     .read       = timerfd_read,
0388     .llseek     = noop_llseek,
0389     .show_fdinfo    = timerfd_show,
0390     .unlocked_ioctl = timerfd_ioctl,
0391 };
0392 
0393 static int timerfd_fget(int fd, struct fd *p)
0394 {
0395     struct fd f = fdget(fd);
0396     if (!f.file)
0397         return -EBADF;
0398     if (f.file->f_op != &timerfd_fops) {
0399         fdput(f);
0400         return -EINVAL;
0401     }
0402     *p = f;
0403     return 0;
0404 }
0405 
0406 SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
0407 {
0408     int ufd;
0409     struct timerfd_ctx *ctx;
0410 
0411     /* Check the TFD_* constants for consistency.  */
0412     BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
0413     BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK);
0414 
0415     if ((flags & ~TFD_CREATE_FLAGS) ||
0416         (clockid != CLOCK_MONOTONIC &&
0417          clockid != CLOCK_REALTIME &&
0418          clockid != CLOCK_REALTIME_ALARM &&
0419          clockid != CLOCK_BOOTTIME &&
0420          clockid != CLOCK_BOOTTIME_ALARM))
0421         return -EINVAL;
0422 
0423     if ((clockid == CLOCK_REALTIME_ALARM ||
0424          clockid == CLOCK_BOOTTIME_ALARM) &&
0425         !capable(CAP_WAKE_ALARM))
0426         return -EPERM;
0427 
0428     ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
0429     if (!ctx)
0430         return -ENOMEM;
0431 
0432     init_waitqueue_head(&ctx->wqh);
0433     spin_lock_init(&ctx->cancel_lock);
0434     ctx->clockid = clockid;
0435 
0436     if (isalarm(ctx))
0437         alarm_init(&ctx->t.alarm,
0438                ctx->clockid == CLOCK_REALTIME_ALARM ?
0439                ALARM_REALTIME : ALARM_BOOTTIME,
0440                timerfd_alarmproc);
0441     else
0442         hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS);
0443 
0444     ctx->moffs = ktime_mono_to_real(0);
0445 
0446     ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
0447                    O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
0448     if (ufd < 0)
0449         kfree(ctx);
0450 
0451     return ufd;
0452 }
0453 
0454 static int do_timerfd_settime(int ufd, int flags, 
0455         const struct itimerspec64 *new,
0456         struct itimerspec64 *old)
0457 {
0458     struct fd f;
0459     struct timerfd_ctx *ctx;
0460     int ret;
0461 
0462     if ((flags & ~TFD_SETTIME_FLAGS) ||
0463          !itimerspec64_valid(new))
0464         return -EINVAL;
0465 
0466     ret = timerfd_fget(ufd, &f);
0467     if (ret)
0468         return ret;
0469     ctx = f.file->private_data;
0470 
0471     if (isalarm(ctx) && !capable(CAP_WAKE_ALARM)) {
0472         fdput(f);
0473         return -EPERM;
0474     }
0475 
0476     timerfd_setup_cancel(ctx, flags);
0477 
0478     /*
0479      * We need to stop the existing timer before reprogramming
0480      * it to the new values.
0481      */
0482     for (;;) {
0483         spin_lock_irq(&ctx->wqh.lock);
0484 
0485         if (isalarm(ctx)) {
0486             if (alarm_try_to_cancel(&ctx->t.alarm) >= 0)
0487                 break;
0488         } else {
0489             if (hrtimer_try_to_cancel(&ctx->t.tmr) >= 0)
0490                 break;
0491         }
0492         spin_unlock_irq(&ctx->wqh.lock);
0493 
0494         if (isalarm(ctx))
0495             hrtimer_cancel_wait_running(&ctx->t.alarm.timer);
0496         else
0497             hrtimer_cancel_wait_running(&ctx->t.tmr);
0498     }
0499 
0500     /*
0501      * If the timer is expired and it's periodic, we need to advance it
0502      * because the caller may want to know the previous expiration time.
0503      * We do not update "ticks" and "expired" since the timer will be
0504      * re-programmed again in the following timerfd_setup() call.
0505      */
0506     if (ctx->expired && ctx->tintv) {
0507         if (isalarm(ctx))
0508             alarm_forward_now(&ctx->t.alarm, ctx->tintv);
0509         else
0510             hrtimer_forward_now(&ctx->t.tmr, ctx->tintv);
0511     }
0512 
0513     old->it_value = ktime_to_timespec64(timerfd_get_remaining(ctx));
0514     old->it_interval = ktime_to_timespec64(ctx->tintv);
0515 
0516     /*
0517      * Re-program the timer to the new value ...
0518      */
0519     ret = timerfd_setup(ctx, flags, new);
0520 
0521     spin_unlock_irq(&ctx->wqh.lock);
0522     fdput(f);
0523     return ret;
0524 }
0525 
0526 static int do_timerfd_gettime(int ufd, struct itimerspec64 *t)
0527 {
0528     struct fd f;
0529     struct timerfd_ctx *ctx;
0530     int ret = timerfd_fget(ufd, &f);
0531     if (ret)
0532         return ret;
0533     ctx = f.file->private_data;
0534 
0535     spin_lock_irq(&ctx->wqh.lock);
0536     if (ctx->expired && ctx->tintv) {
0537         ctx->expired = 0;
0538 
0539         if (isalarm(ctx)) {
0540             ctx->ticks +=
0541                 alarm_forward_now(
0542                     &ctx->t.alarm, ctx->tintv) - 1;
0543             alarm_restart(&ctx->t.alarm);
0544         } else {
0545             ctx->ticks +=
0546                 hrtimer_forward_now(&ctx->t.tmr, ctx->tintv)
0547                 - 1;
0548             hrtimer_restart(&ctx->t.tmr);
0549         }
0550     }
0551     t->it_value = ktime_to_timespec64(timerfd_get_remaining(ctx));
0552     t->it_interval = ktime_to_timespec64(ctx->tintv);
0553     spin_unlock_irq(&ctx->wqh.lock);
0554     fdput(f);
0555     return 0;
0556 }
0557 
0558 SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
0559         const struct __kernel_itimerspec __user *, utmr,
0560         struct __kernel_itimerspec __user *, otmr)
0561 {
0562     struct itimerspec64 new, old;
0563     int ret;
0564 
0565     if (get_itimerspec64(&new, utmr))
0566         return -EFAULT;
0567     ret = do_timerfd_settime(ufd, flags, &new, &old);
0568     if (ret)
0569         return ret;
0570     if (otmr && put_itimerspec64(&old, otmr))
0571         return -EFAULT;
0572 
0573     return ret;
0574 }
0575 
0576 SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct __kernel_itimerspec __user *, otmr)
0577 {
0578     struct itimerspec64 kotmr;
0579     int ret = do_timerfd_gettime(ufd, &kotmr);
0580     if (ret)
0581         return ret;
0582     return put_itimerspec64(&kotmr, otmr) ? -EFAULT : 0;
0583 }
0584 
0585 #ifdef CONFIG_COMPAT_32BIT_TIME
0586 SYSCALL_DEFINE4(timerfd_settime32, int, ufd, int, flags,
0587         const struct old_itimerspec32 __user *, utmr,
0588         struct old_itimerspec32 __user *, otmr)
0589 {
0590     struct itimerspec64 new, old;
0591     int ret;
0592 
0593     if (get_old_itimerspec32(&new, utmr))
0594         return -EFAULT;
0595     ret = do_timerfd_settime(ufd, flags, &new, &old);
0596     if (ret)
0597         return ret;
0598     if (otmr && put_old_itimerspec32(&old, otmr))
0599         return -EFAULT;
0600     return ret;
0601 }
0602 
0603 SYSCALL_DEFINE2(timerfd_gettime32, int, ufd,
0604         struct old_itimerspec32 __user *, otmr)
0605 {
0606     struct itimerspec64 kotmr;
0607     int ret = do_timerfd_gettime(ufd, &kotmr);
0608     if (ret)
0609         return ret;
0610     return put_old_itimerspec32(&kotmr, otmr) ? -EFAULT : 0;
0611 }
0612 #endif