0001
0002
0003
0004
0005
0006
0007
0008
0009 #include <linux/file.h>
0010 #include <linux/poll.h>
0011 #include <linux/init.h>
0012 #include <linux/fs.h>
0013 #include <linux/sched/signal.h>
0014 #include <linux/kernel.h>
0015 #include <linux/slab.h>
0016 #include <linux/list.h>
0017 #include <linux/spinlock.h>
0018 #include <linux/anon_inodes.h>
0019 #include <linux/syscalls.h>
0020 #include <linux/export.h>
0021 #include <linux/kref.h>
0022 #include <linux/eventfd.h>
0023 #include <linux/proc_fs.h>
0024 #include <linux/seq_file.h>
0025 #include <linux/idr.h>
0026 #include <linux/uio.h>
0027
0028 static DEFINE_IDA(eventfd_ida);
0029
0030 struct eventfd_ctx {
0031 struct kref kref;
0032 wait_queue_head_t wqh;
0033
0034
0035
0036
0037
0038
0039
0040
0041 __u64 count;
0042 unsigned int flags;
0043 int id;
0044 };
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060 __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
0061 {
0062 unsigned long flags;
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072 if (WARN_ON_ONCE(current->in_eventfd_signal))
0073 return 0;
0074
0075 spin_lock_irqsave(&ctx->wqh.lock, flags);
0076 current->in_eventfd_signal = 1;
0077 if (ULLONG_MAX - ctx->count < n)
0078 n = ULLONG_MAX - ctx->count;
0079 ctx->count += n;
0080 if (waitqueue_active(&ctx->wqh))
0081 wake_up_locked_poll(&ctx->wqh, EPOLLIN);
0082 current->in_eventfd_signal = 0;
0083 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
0084
0085 return n;
0086 }
0087 EXPORT_SYMBOL_GPL(eventfd_signal);
0088
0089 static void eventfd_free_ctx(struct eventfd_ctx *ctx)
0090 {
0091 if (ctx->id >= 0)
0092 ida_simple_remove(&eventfd_ida, ctx->id);
0093 kfree(ctx);
0094 }
0095
0096 static void eventfd_free(struct kref *kref)
0097 {
0098 struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref);
0099
0100 eventfd_free_ctx(ctx);
0101 }
0102
0103
0104
0105
0106
0107
0108
0109
0110 void eventfd_ctx_put(struct eventfd_ctx *ctx)
0111 {
0112 kref_put(&ctx->kref, eventfd_free);
0113 }
0114 EXPORT_SYMBOL_GPL(eventfd_ctx_put);
0115
0116 static int eventfd_release(struct inode *inode, struct file *file)
0117 {
0118 struct eventfd_ctx *ctx = file->private_data;
0119
0120 wake_up_poll(&ctx->wqh, EPOLLHUP);
0121 eventfd_ctx_put(ctx);
0122 return 0;
0123 }
0124
0125 static __poll_t eventfd_poll(struct file *file, poll_table *wait)
0126 {
0127 struct eventfd_ctx *ctx = file->private_data;
0128 __poll_t events = 0;
0129 u64 count;
0130
0131 poll_wait(file, &ctx->wqh, wait);
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146
0147
0148
0149
0150
0151
0152
0153
0154
0155
0156
0157
0158
0159
0160
0161
0162
0163
0164
0165
0166
0167
0168
0169
0170
0171 count = READ_ONCE(ctx->count);
0172
0173 if (count > 0)
0174 events |= EPOLLIN;
0175 if (count == ULLONG_MAX)
0176 events |= EPOLLERR;
0177 if (ULLONG_MAX - 1 > count)
0178 events |= EPOLLOUT;
0179
0180 return events;
0181 }
0182
0183 void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
0184 {
0185 lockdep_assert_held(&ctx->wqh.lock);
0186
0187 *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
0188 ctx->count -= *cnt;
0189 }
0190 EXPORT_SYMBOL_GPL(eventfd_ctx_do_read);
0191
0192
0193
0194
0195
0196
0197
0198
0199
0200
0201
0202
0203
0204
0205 int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
0206 __u64 *cnt)
0207 {
0208 unsigned long flags;
0209
0210 spin_lock_irqsave(&ctx->wqh.lock, flags);
0211 eventfd_ctx_do_read(ctx, cnt);
0212 __remove_wait_queue(&ctx->wqh, wait);
0213 if (*cnt != 0 && waitqueue_active(&ctx->wqh))
0214 wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
0215 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
0216
0217 return *cnt != 0 ? 0 : -EAGAIN;
0218 }
0219 EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
0220
0221 static ssize_t eventfd_read(struct kiocb *iocb, struct iov_iter *to)
0222 {
0223 struct file *file = iocb->ki_filp;
0224 struct eventfd_ctx *ctx = file->private_data;
0225 __u64 ucnt = 0;
0226 DECLARE_WAITQUEUE(wait, current);
0227
0228 if (iov_iter_count(to) < sizeof(ucnt))
0229 return -EINVAL;
0230 spin_lock_irq(&ctx->wqh.lock);
0231 if (!ctx->count) {
0232 if ((file->f_flags & O_NONBLOCK) ||
0233 (iocb->ki_flags & IOCB_NOWAIT)) {
0234 spin_unlock_irq(&ctx->wqh.lock);
0235 return -EAGAIN;
0236 }
0237 __add_wait_queue(&ctx->wqh, &wait);
0238 for (;;) {
0239 set_current_state(TASK_INTERRUPTIBLE);
0240 if (ctx->count)
0241 break;
0242 if (signal_pending(current)) {
0243 __remove_wait_queue(&ctx->wqh, &wait);
0244 __set_current_state(TASK_RUNNING);
0245 spin_unlock_irq(&ctx->wqh.lock);
0246 return -ERESTARTSYS;
0247 }
0248 spin_unlock_irq(&ctx->wqh.lock);
0249 schedule();
0250 spin_lock_irq(&ctx->wqh.lock);
0251 }
0252 __remove_wait_queue(&ctx->wqh, &wait);
0253 __set_current_state(TASK_RUNNING);
0254 }
0255 eventfd_ctx_do_read(ctx, &ucnt);
0256 if (waitqueue_active(&ctx->wqh))
0257 wake_up_locked_poll(&ctx->wqh, EPOLLOUT);
0258 spin_unlock_irq(&ctx->wqh.lock);
0259 if (unlikely(copy_to_iter(&ucnt, sizeof(ucnt), to) != sizeof(ucnt)))
0260 return -EFAULT;
0261
0262 return sizeof(ucnt);
0263 }
0264
0265 static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
0266 loff_t *ppos)
0267 {
0268 struct eventfd_ctx *ctx = file->private_data;
0269 ssize_t res;
0270 __u64 ucnt;
0271 DECLARE_WAITQUEUE(wait, current);
0272
0273 if (count < sizeof(ucnt))
0274 return -EINVAL;
0275 if (copy_from_user(&ucnt, buf, sizeof(ucnt)))
0276 return -EFAULT;
0277 if (ucnt == ULLONG_MAX)
0278 return -EINVAL;
0279 spin_lock_irq(&ctx->wqh.lock);
0280 res = -EAGAIN;
0281 if (ULLONG_MAX - ctx->count > ucnt)
0282 res = sizeof(ucnt);
0283 else if (!(file->f_flags & O_NONBLOCK)) {
0284 __add_wait_queue(&ctx->wqh, &wait);
0285 for (res = 0;;) {
0286 set_current_state(TASK_INTERRUPTIBLE);
0287 if (ULLONG_MAX - ctx->count > ucnt) {
0288 res = sizeof(ucnt);
0289 break;
0290 }
0291 if (signal_pending(current)) {
0292 res = -ERESTARTSYS;
0293 break;
0294 }
0295 spin_unlock_irq(&ctx->wqh.lock);
0296 schedule();
0297 spin_lock_irq(&ctx->wqh.lock);
0298 }
0299 __remove_wait_queue(&ctx->wqh, &wait);
0300 __set_current_state(TASK_RUNNING);
0301 }
0302 if (likely(res > 0)) {
0303 ctx->count += ucnt;
0304 if (waitqueue_active(&ctx->wqh))
0305 wake_up_locked_poll(&ctx->wqh, EPOLLIN);
0306 }
0307 spin_unlock_irq(&ctx->wqh.lock);
0308
0309 return res;
0310 }
0311
0312 #ifdef CONFIG_PROC_FS
0313 static void eventfd_show_fdinfo(struct seq_file *m, struct file *f)
0314 {
0315 struct eventfd_ctx *ctx = f->private_data;
0316
0317 spin_lock_irq(&ctx->wqh.lock);
0318 seq_printf(m, "eventfd-count: %16llx\n",
0319 (unsigned long long)ctx->count);
0320 spin_unlock_irq(&ctx->wqh.lock);
0321 seq_printf(m, "eventfd-id: %d\n", ctx->id);
0322 }
0323 #endif
0324
0325 static const struct file_operations eventfd_fops = {
0326 #ifdef CONFIG_PROC_FS
0327 .show_fdinfo = eventfd_show_fdinfo,
0328 #endif
0329 .release = eventfd_release,
0330 .poll = eventfd_poll,
0331 .read_iter = eventfd_read,
0332 .write = eventfd_write,
0333 .llseek = noop_llseek,
0334 };
0335
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345
0346 struct file *eventfd_fget(int fd)
0347 {
0348 struct file *file;
0349
0350 file = fget(fd);
0351 if (!file)
0352 return ERR_PTR(-EBADF);
0353 if (file->f_op != &eventfd_fops) {
0354 fput(file);
0355 return ERR_PTR(-EINVAL);
0356 }
0357
0358 return file;
0359 }
0360 EXPORT_SYMBOL_GPL(eventfd_fget);
0361
0362
0363
0364
0365
0366
0367
0368
0369
0370
0371 struct eventfd_ctx *eventfd_ctx_fdget(int fd)
0372 {
0373 struct eventfd_ctx *ctx;
0374 struct fd f = fdget(fd);
0375 if (!f.file)
0376 return ERR_PTR(-EBADF);
0377 ctx = eventfd_ctx_fileget(f.file);
0378 fdput(f);
0379 return ctx;
0380 }
0381 EXPORT_SYMBOL_GPL(eventfd_ctx_fdget);
0382
0383
0384
0385
0386
0387
0388
0389
0390
0391
0392 struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
0393 {
0394 struct eventfd_ctx *ctx;
0395
0396 if (file->f_op != &eventfd_fops)
0397 return ERR_PTR(-EINVAL);
0398
0399 ctx = file->private_data;
0400 kref_get(&ctx->kref);
0401 return ctx;
0402 }
0403 EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
0404
0405 static int do_eventfd(unsigned int count, int flags)
0406 {
0407 struct eventfd_ctx *ctx;
0408 struct file *file;
0409 int fd;
0410
0411
0412 BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
0413 BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
0414
0415 if (flags & ~EFD_FLAGS_SET)
0416 return -EINVAL;
0417
0418 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
0419 if (!ctx)
0420 return -ENOMEM;
0421
0422 kref_init(&ctx->kref);
0423 init_waitqueue_head(&ctx->wqh);
0424 ctx->count = count;
0425 ctx->flags = flags;
0426 ctx->id = ida_simple_get(&eventfd_ida, 0, 0, GFP_KERNEL);
0427
0428 flags &= EFD_SHARED_FCNTL_FLAGS;
0429 flags |= O_RDWR;
0430 fd = get_unused_fd_flags(flags);
0431 if (fd < 0)
0432 goto err;
0433
0434 file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags);
0435 if (IS_ERR(file)) {
0436 put_unused_fd(fd);
0437 fd = PTR_ERR(file);
0438 goto err;
0439 }
0440
0441 file->f_mode |= FMODE_NOWAIT;
0442 fd_install(fd, file);
0443 return fd;
0444 err:
0445 eventfd_free_ctx(ctx);
0446 return fd;
0447 }
0448
0449 SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
0450 {
0451 return do_eventfd(count, flags);
0452 }
0453
0454 SYSCALL_DEFINE1(eventfd, unsigned int, count)
0455 {
0456 return do_eventfd(count, 0);
0457 }
0458