Back to home page

LXR

 
 

    


0001 /*
0002  *  fs/timerfd.c
0003  *
0004  *  Copyright (C) 2007  Davide Libenzi <davidel@xmailserver.org>
0005  *
0006  *
0007  *  Thanks to Thomas Gleixner for code reviews and useful comments.
0008  *
0009  */
0010 
0011 #include <linux/alarmtimer.h>
0012 #include <linux/file.h>
0013 #include <linux/poll.h>
0014 #include <linux/init.h>
0015 #include <linux/fs.h>
0016 #include <linux/sched.h>
0017 #include <linux/kernel.h>
0018 #include <linux/slab.h>
0019 #include <linux/list.h>
0020 #include <linux/spinlock.h>
0021 #include <linux/time.h>
0022 #include <linux/hrtimer.h>
0023 #include <linux/anon_inodes.h>
0024 #include <linux/timerfd.h>
0025 #include <linux/syscalls.h>
0026 #include <linux/compat.h>
0027 #include <linux/rcupdate.h>
0028 
0029 struct timerfd_ctx {
0030     union {
0031         struct hrtimer tmr;
0032         struct alarm alarm;
0033     } t;
0034     ktime_t tintv;
0035     ktime_t moffs;
0036     wait_queue_head_t wqh;
0037     u64 ticks;
0038     int clockid;
0039     short unsigned expired;
0040     short unsigned settime_flags;   /* to show in fdinfo */
0041     struct rcu_head rcu;
0042     struct list_head clist;
0043     bool might_cancel;
0044 };
0045 
0046 static LIST_HEAD(cancel_list);
0047 static DEFINE_SPINLOCK(cancel_lock);
0048 
0049 static inline bool isalarm(struct timerfd_ctx *ctx)
0050 {
0051     return ctx->clockid == CLOCK_REALTIME_ALARM ||
0052         ctx->clockid == CLOCK_BOOTTIME_ALARM;
0053 }
0054 
0055 /*
0056  * This gets called when the timer event triggers. We set the "expired"
0057  * flag, but we do not re-arm the timer (in case it's necessary,
0058  * tintv != 0) until the timer is accessed.
0059  */
0060 static void timerfd_triggered(struct timerfd_ctx *ctx)
0061 {
0062     unsigned long flags;
0063 
0064     spin_lock_irqsave(&ctx->wqh.lock, flags);
0065     ctx->expired = 1;
0066     ctx->ticks++;
0067     wake_up_locked(&ctx->wqh);
0068     spin_unlock_irqrestore(&ctx->wqh.lock, flags);
0069 }
0070 
0071 static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
0072 {
0073     struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx,
0074                            t.tmr);
0075     timerfd_triggered(ctx);
0076     return HRTIMER_NORESTART;
0077 }
0078 
0079 static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm,
0080     ktime_t now)
0081 {
0082     struct timerfd_ctx *ctx = container_of(alarm, struct timerfd_ctx,
0083                            t.alarm);
0084     timerfd_triggered(ctx);
0085     return ALARMTIMER_NORESTART;
0086 }
0087 
0088 /*
0089  * Called when the clock was set to cancel the timers in the cancel
0090  * list. This will wake up processes waiting on these timers. The
0091  * wake-up requires ctx->ticks to be non zero, therefore we increment
0092  * it before calling wake_up_locked().
0093  */
0094 void timerfd_clock_was_set(void)
0095 {
0096     ktime_t moffs = ktime_mono_to_real(0);
0097     struct timerfd_ctx *ctx;
0098     unsigned long flags;
0099 
0100     rcu_read_lock();
0101     list_for_each_entry_rcu(ctx, &cancel_list, clist) {
0102         if (!ctx->might_cancel)
0103             continue;
0104         spin_lock_irqsave(&ctx->wqh.lock, flags);
0105         if (ctx->moffs != moffs) {
0106             ctx->moffs = KTIME_MAX;
0107             ctx->ticks++;
0108             wake_up_locked(&ctx->wqh);
0109         }
0110         spin_unlock_irqrestore(&ctx->wqh.lock, flags);
0111     }
0112     rcu_read_unlock();
0113 }
0114 
0115 static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
0116 {
0117     if (ctx->might_cancel) {
0118         ctx->might_cancel = false;
0119         spin_lock(&cancel_lock);
0120         list_del_rcu(&ctx->clist);
0121         spin_unlock(&cancel_lock);
0122     }
0123 }
0124 
0125 static bool timerfd_canceled(struct timerfd_ctx *ctx)
0126 {
0127     if (!ctx->might_cancel || ctx->moffs != KTIME_MAX)
0128         return false;
0129     ctx->moffs = ktime_mono_to_real(0);
0130     return true;
0131 }
0132 
0133 static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
0134 {
0135     if ((ctx->clockid == CLOCK_REALTIME ||
0136          ctx->clockid == CLOCK_REALTIME_ALARM) &&
0137         (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) {
0138         if (!ctx->might_cancel) {
0139             ctx->might_cancel = true;
0140             spin_lock(&cancel_lock);
0141             list_add_rcu(&ctx->clist, &cancel_list);
0142             spin_unlock(&cancel_lock);
0143         }
0144     } else if (ctx->might_cancel) {
0145         timerfd_remove_cancel(ctx);
0146     }
0147 }
0148 
0149 static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
0150 {
0151     ktime_t remaining;
0152 
0153     if (isalarm(ctx))
0154         remaining = alarm_expires_remaining(&ctx->t.alarm);
0155     else
0156         remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr);
0157 
0158     return remaining < 0 ? 0: remaining;
0159 }
0160 
0161 static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
0162              const struct itimerspec *ktmr)
0163 {
0164     enum hrtimer_mode htmode;
0165     ktime_t texp;
0166     int clockid = ctx->clockid;
0167 
0168     htmode = (flags & TFD_TIMER_ABSTIME) ?
0169         HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
0170 
0171     texp = timespec_to_ktime(ktmr->it_value);
0172     ctx->expired = 0;
0173     ctx->ticks = 0;
0174     ctx->tintv = timespec_to_ktime(ktmr->it_interval);
0175 
0176     if (isalarm(ctx)) {
0177         alarm_init(&ctx->t.alarm,
0178                ctx->clockid == CLOCK_REALTIME_ALARM ?
0179                ALARM_REALTIME : ALARM_BOOTTIME,
0180                timerfd_alarmproc);
0181     } else {
0182         hrtimer_init(&ctx->t.tmr, clockid, htmode);
0183         hrtimer_set_expires(&ctx->t.tmr, texp);
0184         ctx->t.tmr.function = timerfd_tmrproc;
0185     }
0186 
0187     if (texp != 0) {
0188         if (isalarm(ctx)) {
0189             if (flags & TFD_TIMER_ABSTIME)
0190                 alarm_start(&ctx->t.alarm, texp);
0191             else
0192                 alarm_start_relative(&ctx->t.alarm, texp);
0193         } else {
0194             hrtimer_start(&ctx->t.tmr, texp, htmode);
0195         }
0196 
0197         if (timerfd_canceled(ctx))
0198             return -ECANCELED;
0199     }
0200 
0201     ctx->settime_flags = flags & TFD_SETTIME_FLAGS;
0202     return 0;
0203 }
0204 
0205 static int timerfd_release(struct inode *inode, struct file *file)
0206 {
0207     struct timerfd_ctx *ctx = file->private_data;
0208 
0209     timerfd_remove_cancel(ctx);
0210 
0211     if (isalarm(ctx))
0212         alarm_cancel(&ctx->t.alarm);
0213     else
0214         hrtimer_cancel(&ctx->t.tmr);
0215     kfree_rcu(ctx, rcu);
0216     return 0;
0217 }
0218 
0219 static unsigned int timerfd_poll(struct file *file, poll_table *wait)
0220 {
0221     struct timerfd_ctx *ctx = file->private_data;
0222     unsigned int events = 0;
0223     unsigned long flags;
0224 
0225     poll_wait(file, &ctx->wqh, wait);
0226 
0227     spin_lock_irqsave(&ctx->wqh.lock, flags);
0228     if (ctx->ticks)
0229         events |= POLLIN;
0230     spin_unlock_irqrestore(&ctx->wqh.lock, flags);
0231 
0232     return events;
0233 }
0234 
0235 static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
0236                 loff_t *ppos)
0237 {
0238     struct timerfd_ctx *ctx = file->private_data;
0239     ssize_t res;
0240     u64 ticks = 0;
0241 
0242     if (count < sizeof(ticks))
0243         return -EINVAL;
0244     spin_lock_irq(&ctx->wqh.lock);
0245     if (file->f_flags & O_NONBLOCK)
0246         res = -EAGAIN;
0247     else
0248         res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
0249 
0250     /*
0251      * If clock has changed, we do not care about the
0252      * ticks and we do not rearm the timer. Userspace must
0253      * reevaluate anyway.
0254      */
0255     if (timerfd_canceled(ctx)) {
0256         ctx->ticks = 0;
0257         ctx->expired = 0;
0258         res = -ECANCELED;
0259     }
0260 
0261     if (ctx->ticks) {
0262         ticks = ctx->ticks;
0263 
0264         if (ctx->expired && ctx->tintv) {
0265             /*
0266              * If tintv != 0, this is a periodic timer that
0267              * needs to be re-armed. We avoid doing it in the timer
0268              * callback to avoid DoS attacks specifying a very
0269              * short timer period.
0270              */
0271             if (isalarm(ctx)) {
0272                 ticks += alarm_forward_now(
0273                     &ctx->t.alarm, ctx->tintv) - 1;
0274                 alarm_restart(&ctx->t.alarm);
0275             } else {
0276                 ticks += hrtimer_forward_now(&ctx->t.tmr,
0277                                  ctx->tintv) - 1;
0278                 hrtimer_restart(&ctx->t.tmr);
0279             }
0280         }
0281         ctx->expired = 0;
0282         ctx->ticks = 0;
0283     }
0284     spin_unlock_irq(&ctx->wqh.lock);
0285     if (ticks)
0286         res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
0287     return res;
0288 }
0289 
0290 #ifdef CONFIG_PROC_FS
0291 static void timerfd_show(struct seq_file *m, struct file *file)
0292 {
0293     struct timerfd_ctx *ctx = file->private_data;
0294     struct itimerspec t;
0295 
0296     spin_lock_irq(&ctx->wqh.lock);
0297     t.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
0298     t.it_interval = ktime_to_timespec(ctx->tintv);
0299     spin_unlock_irq(&ctx->wqh.lock);
0300 
0301     seq_printf(m,
0302            "clockid: %d\n"
0303            "ticks: %llu\n"
0304            "settime flags: 0%o\n"
0305            "it_value: (%llu, %llu)\n"
0306            "it_interval: (%llu, %llu)\n",
0307            ctx->clockid,
0308            (unsigned long long)ctx->ticks,
0309            ctx->settime_flags,
0310            (unsigned long long)t.it_value.tv_sec,
0311            (unsigned long long)t.it_value.tv_nsec,
0312            (unsigned long long)t.it_interval.tv_sec,
0313            (unsigned long long)t.it_interval.tv_nsec);
0314 }
0315 #else
0316 #define timerfd_show NULL
0317 #endif
0318 
0319 #ifdef CONFIG_CHECKPOINT_RESTORE
0320 static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
0321 {
0322     struct timerfd_ctx *ctx = file->private_data;
0323     int ret = 0;
0324 
0325     switch (cmd) {
0326     case TFD_IOC_SET_TICKS: {
0327         u64 ticks;
0328 
0329         if (copy_from_user(&ticks, (u64 __user *)arg, sizeof(ticks)))
0330             return -EFAULT;
0331         if (!ticks)
0332             return -EINVAL;
0333 
0334         spin_lock_irq(&ctx->wqh.lock);
0335         if (!timerfd_canceled(ctx)) {
0336             ctx->ticks = ticks;
0337             wake_up_locked(&ctx->wqh);
0338         } else
0339             ret = -ECANCELED;
0340         spin_unlock_irq(&ctx->wqh.lock);
0341         break;
0342     }
0343     default:
0344         ret = -ENOTTY;
0345         break;
0346     }
0347 
0348     return ret;
0349 }
0350 #else
0351 #define timerfd_ioctl NULL
0352 #endif
0353 
0354 static const struct file_operations timerfd_fops = {
0355     .release    = timerfd_release,
0356     .poll       = timerfd_poll,
0357     .read       = timerfd_read,
0358     .llseek     = noop_llseek,
0359     .show_fdinfo    = timerfd_show,
0360     .unlocked_ioctl = timerfd_ioctl,
0361 };
0362 
0363 static int timerfd_fget(int fd, struct fd *p)
0364 {
0365     struct fd f = fdget(fd);
0366     if (!f.file)
0367         return -EBADF;
0368     if (f.file->f_op != &timerfd_fops) {
0369         fdput(f);
0370         return -EINVAL;
0371     }
0372     *p = f;
0373     return 0;
0374 }
0375 
0376 SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
0377 {
0378     int ufd;
0379     struct timerfd_ctx *ctx;
0380 
0381     /* Check the TFD_* constants for consistency.  */
0382     BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
0383     BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK);
0384 
0385     if ((flags & ~TFD_CREATE_FLAGS) ||
0386         (clockid != CLOCK_MONOTONIC &&
0387          clockid != CLOCK_REALTIME &&
0388          clockid != CLOCK_REALTIME_ALARM &&
0389          clockid != CLOCK_BOOTTIME &&
0390          clockid != CLOCK_BOOTTIME_ALARM))
0391         return -EINVAL;
0392 
0393     if (!capable(CAP_WAKE_ALARM) &&
0394         (clockid == CLOCK_REALTIME_ALARM ||
0395          clockid == CLOCK_BOOTTIME_ALARM))
0396         return -EPERM;
0397 
0398     ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
0399     if (!ctx)
0400         return -ENOMEM;
0401 
0402     init_waitqueue_head(&ctx->wqh);
0403     ctx->clockid = clockid;
0404 
0405     if (isalarm(ctx))
0406         alarm_init(&ctx->t.alarm,
0407                ctx->clockid == CLOCK_REALTIME_ALARM ?
0408                ALARM_REALTIME : ALARM_BOOTTIME,
0409                timerfd_alarmproc);
0410     else
0411         hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS);
0412 
0413     ctx->moffs = ktime_mono_to_real(0);
0414 
0415     ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
0416                    O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
0417     if (ufd < 0)
0418         kfree(ctx);
0419 
0420     return ufd;
0421 }
0422 
0423 static int do_timerfd_settime(int ufd, int flags, 
0424         const struct itimerspec *new,
0425         struct itimerspec *old)
0426 {
0427     struct fd f;
0428     struct timerfd_ctx *ctx;
0429     int ret;
0430 
0431     if ((flags & ~TFD_SETTIME_FLAGS) ||
0432         !timespec_valid(&new->it_value) ||
0433         !timespec_valid(&new->it_interval))
0434         return -EINVAL;
0435 
0436     ret = timerfd_fget(ufd, &f);
0437     if (ret)
0438         return ret;
0439     ctx = f.file->private_data;
0440 
0441     if (!capable(CAP_WAKE_ALARM) && isalarm(ctx)) {
0442         fdput(f);
0443         return -EPERM;
0444     }
0445 
0446     timerfd_setup_cancel(ctx, flags);
0447 
0448     /*
0449      * We need to stop the existing timer before reprogramming
0450      * it to the new values.
0451      */
0452     for (;;) {
0453         spin_lock_irq(&ctx->wqh.lock);
0454 
0455         if (isalarm(ctx)) {
0456             if (alarm_try_to_cancel(&ctx->t.alarm) >= 0)
0457                 break;
0458         } else {
0459             if (hrtimer_try_to_cancel(&ctx->t.tmr) >= 0)
0460                 break;
0461         }
0462         spin_unlock_irq(&ctx->wqh.lock);
0463         cpu_relax();
0464     }
0465 
0466     /*
0467      * If the timer is expired and it's periodic, we need to advance it
0468      * because the caller may want to know the previous expiration time.
0469      * We do not update "ticks" and "expired" since the timer will be
0470      * re-programmed again in the following timerfd_setup() call.
0471      */
0472     if (ctx->expired && ctx->tintv) {
0473         if (isalarm(ctx))
0474             alarm_forward_now(&ctx->t.alarm, ctx->tintv);
0475         else
0476             hrtimer_forward_now(&ctx->t.tmr, ctx->tintv);
0477     }
0478 
0479     old->it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
0480     old->it_interval = ktime_to_timespec(ctx->tintv);
0481 
0482     /*
0483      * Re-program the timer to the new value ...
0484      */
0485     ret = timerfd_setup(ctx, flags, new);
0486 
0487     spin_unlock_irq(&ctx->wqh.lock);
0488     fdput(f);
0489     return ret;
0490 }
0491 
0492 static int do_timerfd_gettime(int ufd, struct itimerspec *t)
0493 {
0494     struct fd f;
0495     struct timerfd_ctx *ctx;
0496     int ret = timerfd_fget(ufd, &f);
0497     if (ret)
0498         return ret;
0499     ctx = f.file->private_data;
0500 
0501     spin_lock_irq(&ctx->wqh.lock);
0502     if (ctx->expired && ctx->tintv) {
0503         ctx->expired = 0;
0504 
0505         if (isalarm(ctx)) {
0506             ctx->ticks +=
0507                 alarm_forward_now(
0508                     &ctx->t.alarm, ctx->tintv) - 1;
0509             alarm_restart(&ctx->t.alarm);
0510         } else {
0511             ctx->ticks +=
0512                 hrtimer_forward_now(&ctx->t.tmr, ctx->tintv)
0513                 - 1;
0514             hrtimer_restart(&ctx->t.tmr);
0515         }
0516     }
0517     t->it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
0518     t->it_interval = ktime_to_timespec(ctx->tintv);
0519     spin_unlock_irq(&ctx->wqh.lock);
0520     fdput(f);
0521     return 0;
0522 }
0523 
0524 SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
0525         const struct itimerspec __user *, utmr,
0526         struct itimerspec __user *, otmr)
0527 {
0528     struct itimerspec new, old;
0529     int ret;
0530 
0531     if (copy_from_user(&new, utmr, sizeof(new)))
0532         return -EFAULT;
0533     ret = do_timerfd_settime(ufd, flags, &new, &old);
0534     if (ret)
0535         return ret;
0536     if (otmr && copy_to_user(otmr, &old, sizeof(old)))
0537         return -EFAULT;
0538 
0539     return ret;
0540 }
0541 
0542 SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
0543 {
0544     struct itimerspec kotmr;
0545     int ret = do_timerfd_gettime(ufd, &kotmr);
0546     if (ret)
0547         return ret;
0548     return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
0549 }
0550 
0551 #ifdef CONFIG_COMPAT
0552 COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
0553         const struct compat_itimerspec __user *, utmr,
0554         struct compat_itimerspec __user *, otmr)
0555 {
0556     struct itimerspec new, old;
0557     int ret;
0558 
0559     if (get_compat_itimerspec(&new, utmr))
0560         return -EFAULT;
0561     ret = do_timerfd_settime(ufd, flags, &new, &old);
0562     if (ret)
0563         return ret;
0564     if (otmr && put_compat_itimerspec(otmr, &old))
0565         return -EFAULT;
0566     return ret;
0567 }
0568 
0569 COMPAT_SYSCALL_DEFINE2(timerfd_gettime, int, ufd,
0570         struct compat_itimerspec __user *, otmr)
0571 {
0572     struct itimerspec kotmr;
0573     int ret = do_timerfd_gettime(ufd, &kotmr);
0574     if (ret)
0575         return ret;
0576     return put_compat_itimerspec(otmr, &kotmr) ? -EFAULT: 0;
0577 }
0578 #endif