0001
0002
0003
0004
0005
0006
0007
0008 #include <linux/mm.h>
0009 #include <linux/file.h>
0010 #include <linux/poll.h>
0011 #include <linux/slab.h>
0012 #include <linux/module.h>
0013 #include <linux/init.h>
0014 #include <linux/fs.h>
0015 #include <linux/log2.h>
0016 #include <linux/mount.h>
0017 #include <linux/pseudo_fs.h>
0018 #include <linux/magic.h>
0019 #include <linux/pipe_fs_i.h>
0020 #include <linux/uio.h>
0021 #include <linux/highmem.h>
0022 #include <linux/pagemap.h>
0023 #include <linux/audit.h>
0024 #include <linux/syscalls.h>
0025 #include <linux/fcntl.h>
0026 #include <linux/memcontrol.h>
0027 #include <linux/watch_queue.h>
0028 #include <linux/sysctl.h>
0029
0030 #include <linux/uaccess.h>
0031 #include <asm/ioctls.h>
0032
0033 #include "internal.h"
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048 #define PIPE_MIN_DEF_BUFFERS 2
0049
0050
0051
0052
0053
0054 static unsigned int pipe_max_size = 1048576;
0055
0056
0057
0058
0059 static unsigned long pipe_user_pages_hard;
0060 static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079 static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
0080 {
0081 if (pipe->files)
0082 mutex_lock_nested(&pipe->mutex, subclass);
0083 }
0084
0085 void pipe_lock(struct pipe_inode_info *pipe)
0086 {
0087
0088
0089
0090 pipe_lock_nested(pipe, I_MUTEX_PARENT);
0091 }
0092 EXPORT_SYMBOL(pipe_lock);
0093
0094 void pipe_unlock(struct pipe_inode_info *pipe)
0095 {
0096 if (pipe->files)
0097 mutex_unlock(&pipe->mutex);
0098 }
0099 EXPORT_SYMBOL(pipe_unlock);
0100
0101 static inline void __pipe_lock(struct pipe_inode_info *pipe)
0102 {
0103 mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
0104 }
0105
0106 static inline void __pipe_unlock(struct pipe_inode_info *pipe)
0107 {
0108 mutex_unlock(&pipe->mutex);
0109 }
0110
0111 void pipe_double_lock(struct pipe_inode_info *pipe1,
0112 struct pipe_inode_info *pipe2)
0113 {
0114 BUG_ON(pipe1 == pipe2);
0115
0116 if (pipe1 < pipe2) {
0117 pipe_lock_nested(pipe1, I_MUTEX_PARENT);
0118 pipe_lock_nested(pipe2, I_MUTEX_CHILD);
0119 } else {
0120 pipe_lock_nested(pipe2, I_MUTEX_PARENT);
0121 pipe_lock_nested(pipe1, I_MUTEX_CHILD);
0122 }
0123 }
0124
0125 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
0126 struct pipe_buffer *buf)
0127 {
0128 struct page *page = buf->page;
0129
0130
0131
0132
0133
0134
0135 if (page_count(page) == 1 && !pipe->tmp_page)
0136 pipe->tmp_page = page;
0137 else
0138 put_page(page);
0139 }
0140
0141 static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe,
0142 struct pipe_buffer *buf)
0143 {
0144 struct page *page = buf->page;
0145
0146 if (page_count(page) != 1)
0147 return false;
0148 memcg_kmem_uncharge_page(page, 0);
0149 __SetPageLocked(page);
0150 return true;
0151 }
0152
0153
0154
0155
0156
0157
0158
0159
0160
0161
0162
0163
0164
0165 bool generic_pipe_buf_try_steal(struct pipe_inode_info *pipe,
0166 struct pipe_buffer *buf)
0167 {
0168 struct page *page = buf->page;
0169
0170
0171
0172
0173
0174
0175 if (page_count(page) == 1) {
0176 lock_page(page);
0177 return true;
0178 }
0179 return false;
0180 }
0181 EXPORT_SYMBOL(generic_pipe_buf_try_steal);
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193 bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
0194 {
0195 return try_get_page(buf->page);
0196 }
0197 EXPORT_SYMBOL(generic_pipe_buf_get);
0198
0199
0200
0201
0202
0203
0204
0205
0206
0207 void generic_pipe_buf_release(struct pipe_inode_info *pipe,
0208 struct pipe_buffer *buf)
0209 {
0210 put_page(buf->page);
0211 }
0212 EXPORT_SYMBOL(generic_pipe_buf_release);
0213
0214 static const struct pipe_buf_operations anon_pipe_buf_ops = {
0215 .release = anon_pipe_buf_release,
0216 .try_steal = anon_pipe_buf_try_steal,
0217 .get = generic_pipe_buf_get,
0218 };
0219
0220
0221 static inline bool pipe_readable(const struct pipe_inode_info *pipe)
0222 {
0223 unsigned int head = READ_ONCE(pipe->head);
0224 unsigned int tail = READ_ONCE(pipe->tail);
0225 unsigned int writers = READ_ONCE(pipe->writers);
0226
0227 return !pipe_empty(head, tail) || !writers;
0228 }
0229
0230 static ssize_t
0231 pipe_read(struct kiocb *iocb, struct iov_iter *to)
0232 {
0233 size_t total_len = iov_iter_count(to);
0234 struct file *filp = iocb->ki_filp;
0235 struct pipe_inode_info *pipe = filp->private_data;
0236 bool was_full, wake_next_reader = false;
0237 ssize_t ret;
0238
0239
0240 if (unlikely(total_len == 0))
0241 return 0;
0242
0243 ret = 0;
0244 __pipe_lock(pipe);
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254 was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
0255 for (;;) {
0256
0257 unsigned int head = smp_load_acquire(&pipe->head);
0258 unsigned int tail = pipe->tail;
0259 unsigned int mask = pipe->ring_size - 1;
0260
0261 #ifdef CONFIG_WATCH_QUEUE
0262 if (pipe->note_loss) {
0263 struct watch_notification n;
0264
0265 if (total_len < 8) {
0266 if (ret == 0)
0267 ret = -ENOBUFS;
0268 break;
0269 }
0270
0271 n.type = WATCH_TYPE_META;
0272 n.subtype = WATCH_META_LOSS_NOTIFICATION;
0273 n.info = watch_sizeof(n);
0274 if (copy_to_iter(&n, sizeof(n), to) != sizeof(n)) {
0275 if (ret == 0)
0276 ret = -EFAULT;
0277 break;
0278 }
0279 ret += sizeof(n);
0280 total_len -= sizeof(n);
0281 pipe->note_loss = false;
0282 }
0283 #endif
0284
0285 if (!pipe_empty(head, tail)) {
0286 struct pipe_buffer *buf = &pipe->bufs[tail & mask];
0287 size_t chars = buf->len;
0288 size_t written;
0289 int error;
0290
0291 if (chars > total_len) {
0292 if (buf->flags & PIPE_BUF_FLAG_WHOLE) {
0293 if (ret == 0)
0294 ret = -ENOBUFS;
0295 break;
0296 }
0297 chars = total_len;
0298 }
0299
0300 error = pipe_buf_confirm(pipe, buf);
0301 if (error) {
0302 if (!ret)
0303 ret = error;
0304 break;
0305 }
0306
0307 written = copy_page_to_iter(buf->page, buf->offset, chars, to);
0308 if (unlikely(written < chars)) {
0309 if (!ret)
0310 ret = -EFAULT;
0311 break;
0312 }
0313 ret += chars;
0314 buf->offset += chars;
0315 buf->len -= chars;
0316
0317
0318 if (buf->flags & PIPE_BUF_FLAG_PACKET) {
0319 total_len = chars;
0320 buf->len = 0;
0321 }
0322
0323 if (!buf->len) {
0324 pipe_buf_release(pipe, buf);
0325 spin_lock_irq(&pipe->rd_wait.lock);
0326 #ifdef CONFIG_WATCH_QUEUE
0327 if (buf->flags & PIPE_BUF_FLAG_LOSS)
0328 pipe->note_loss = true;
0329 #endif
0330 tail++;
0331 pipe->tail = tail;
0332 spin_unlock_irq(&pipe->rd_wait.lock);
0333 }
0334 total_len -= chars;
0335 if (!total_len)
0336 break;
0337 if (!pipe_empty(head, tail))
0338 continue;
0339 }
0340
0341 if (!pipe->writers)
0342 break;
0343 if (ret)
0344 break;
0345 if (filp->f_flags & O_NONBLOCK) {
0346 ret = -EAGAIN;
0347 break;
0348 }
0349 __pipe_unlock(pipe);
0350
0351
0352
0353
0354
0355
0356
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366
0367
0368 if (unlikely(was_full))
0369 wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
0370 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
0371
0372
0373
0374
0375
0376
0377
0378 if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
0379 return -ERESTARTSYS;
0380
0381 __pipe_lock(pipe);
0382 was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
0383 wake_next_reader = true;
0384 }
0385 if (pipe_empty(pipe->head, pipe->tail))
0386 wake_next_reader = false;
0387 __pipe_unlock(pipe);
0388
0389 if (was_full)
0390 wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
0391 if (wake_next_reader)
0392 wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
0393 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
0394 if (ret > 0)
0395 file_accessed(filp);
0396 return ret;
0397 }
0398
0399 static inline int is_packetized(struct file *file)
0400 {
0401 return (file->f_flags & O_DIRECT) != 0;
0402 }
0403
0404
0405 static inline bool pipe_writable(const struct pipe_inode_info *pipe)
0406 {
0407 unsigned int head = READ_ONCE(pipe->head);
0408 unsigned int tail = READ_ONCE(pipe->tail);
0409 unsigned int max_usage = READ_ONCE(pipe->max_usage);
0410
0411 return !pipe_full(head, tail, max_usage) ||
0412 !READ_ONCE(pipe->readers);
0413 }
0414
0415 static ssize_t
0416 pipe_write(struct kiocb *iocb, struct iov_iter *from)
0417 {
0418 struct file *filp = iocb->ki_filp;
0419 struct pipe_inode_info *pipe = filp->private_data;
0420 unsigned int head;
0421 ssize_t ret = 0;
0422 size_t total_len = iov_iter_count(from);
0423 ssize_t chars;
0424 bool was_empty = false;
0425 bool wake_next_writer = false;
0426
0427
0428 if (unlikely(total_len == 0))
0429 return 0;
0430
0431 __pipe_lock(pipe);
0432
0433 if (!pipe->readers) {
0434 send_sig(SIGPIPE, current, 0);
0435 ret = -EPIPE;
0436 goto out;
0437 }
0438
0439 #ifdef CONFIG_WATCH_QUEUE
0440 if (pipe->watch_queue) {
0441 ret = -EXDEV;
0442 goto out;
0443 }
0444 #endif
0445
0446
0447
0448
0449
0450
0451
0452
0453
0454 head = pipe->head;
0455 was_empty = pipe_empty(head, pipe->tail);
0456 chars = total_len & (PAGE_SIZE-1);
0457 if (chars && !was_empty) {
0458 unsigned int mask = pipe->ring_size - 1;
0459 struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
0460 int offset = buf->offset + buf->len;
0461
0462 if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
0463 offset + chars <= PAGE_SIZE) {
0464 ret = pipe_buf_confirm(pipe, buf);
0465 if (ret)
0466 goto out;
0467
0468 ret = copy_page_from_iter(buf->page, offset, chars, from);
0469 if (unlikely(ret < chars)) {
0470 ret = -EFAULT;
0471 goto out;
0472 }
0473
0474 buf->len += ret;
0475 if (!iov_iter_count(from))
0476 goto out;
0477 }
0478 }
0479
0480 for (;;) {
0481 if (!pipe->readers) {
0482 send_sig(SIGPIPE, current, 0);
0483 if (!ret)
0484 ret = -EPIPE;
0485 break;
0486 }
0487
0488 head = pipe->head;
0489 if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
0490 unsigned int mask = pipe->ring_size - 1;
0491 struct pipe_buffer *buf = &pipe->bufs[head & mask];
0492 struct page *page = pipe->tmp_page;
0493 int copied;
0494
0495 if (!page) {
0496 page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
0497 if (unlikely(!page)) {
0498 ret = ret ? : -ENOMEM;
0499 break;
0500 }
0501 pipe->tmp_page = page;
0502 }
0503
0504
0505
0506
0507
0508
0509 spin_lock_irq(&pipe->rd_wait.lock);
0510
0511 head = pipe->head;
0512 if (pipe_full(head, pipe->tail, pipe->max_usage)) {
0513 spin_unlock_irq(&pipe->rd_wait.lock);
0514 continue;
0515 }
0516
0517 pipe->head = head + 1;
0518 spin_unlock_irq(&pipe->rd_wait.lock);
0519
0520
0521 buf = &pipe->bufs[head & mask];
0522 buf->page = page;
0523 buf->ops = &anon_pipe_buf_ops;
0524 buf->offset = 0;
0525 buf->len = 0;
0526 if (is_packetized(filp))
0527 buf->flags = PIPE_BUF_FLAG_PACKET;
0528 else
0529 buf->flags = PIPE_BUF_FLAG_CAN_MERGE;
0530 pipe->tmp_page = NULL;
0531
0532 copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
0533 if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
0534 if (!ret)
0535 ret = -EFAULT;
0536 break;
0537 }
0538 ret += copied;
0539 buf->offset = 0;
0540 buf->len = copied;
0541
0542 if (!iov_iter_count(from))
0543 break;
0544 }
0545
0546 if (!pipe_full(head, pipe->tail, pipe->max_usage))
0547 continue;
0548
0549
0550 if (filp->f_flags & O_NONBLOCK) {
0551 if (!ret)
0552 ret = -EAGAIN;
0553 break;
0554 }
0555 if (signal_pending(current)) {
0556 if (!ret)
0557 ret = -ERESTARTSYS;
0558 break;
0559 }
0560
0561
0562
0563
0564
0565
0566
0567 __pipe_unlock(pipe);
0568 if (was_empty)
0569 wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
0570 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
0571 wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
0572 __pipe_lock(pipe);
0573 was_empty = pipe_empty(pipe->head, pipe->tail);
0574 wake_next_writer = true;
0575 }
0576 out:
0577 if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
0578 wake_next_writer = false;
0579 __pipe_unlock(pipe);
0580
0581
0582
0583
0584
0585
0586
0587
0588
0589
0590
0591
0592
0593 if (was_empty || pipe->poll_usage)
0594 wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
0595 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
0596 if (wake_next_writer)
0597 wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
0598 if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
0599 int err = file_update_time(filp);
0600 if (err)
0601 ret = err;
0602 sb_end_write(file_inode(filp)->i_sb);
0603 }
0604 return ret;
0605 }
0606
0607 static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
0608 {
0609 struct pipe_inode_info *pipe = filp->private_data;
0610 unsigned int count, head, tail, mask;
0611
0612 switch (cmd) {
0613 case FIONREAD:
0614 __pipe_lock(pipe);
0615 count = 0;
0616 head = pipe->head;
0617 tail = pipe->tail;
0618 mask = pipe->ring_size - 1;
0619
0620 while (tail != head) {
0621 count += pipe->bufs[tail & mask].len;
0622 tail++;
0623 }
0624 __pipe_unlock(pipe);
0625
0626 return put_user(count, (int __user *)arg);
0627
0628 #ifdef CONFIG_WATCH_QUEUE
0629 case IOC_WATCH_QUEUE_SET_SIZE: {
0630 int ret;
0631 __pipe_lock(pipe);
0632 ret = watch_queue_set_size(pipe, arg);
0633 __pipe_unlock(pipe);
0634 return ret;
0635 }
0636
0637 case IOC_WATCH_QUEUE_SET_FILTER:
0638 return watch_queue_set_filter(
0639 pipe, (struct watch_notification_filter __user *)arg);
0640 #endif
0641
0642 default:
0643 return -ENOIOCTLCMD;
0644 }
0645 }
0646
0647
0648 static __poll_t
0649 pipe_poll(struct file *filp, poll_table *wait)
0650 {
0651 __poll_t mask;
0652 struct pipe_inode_info *pipe = filp->private_data;
0653 unsigned int head, tail;
0654
0655
0656 WRITE_ONCE(pipe->poll_usage, true);
0657
0658
0659
0660
0661
0662
0663
0664 if (filp->f_mode & FMODE_READ)
0665 poll_wait(filp, &pipe->rd_wait, wait);
0666 if (filp->f_mode & FMODE_WRITE)
0667 poll_wait(filp, &pipe->wr_wait, wait);
0668
0669
0670
0671
0672
0673
0674 head = READ_ONCE(pipe->head);
0675 tail = READ_ONCE(pipe->tail);
0676
0677 mask = 0;
0678 if (filp->f_mode & FMODE_READ) {
0679 if (!pipe_empty(head, tail))
0680 mask |= EPOLLIN | EPOLLRDNORM;
0681 if (!pipe->writers && filp->f_version != pipe->w_counter)
0682 mask |= EPOLLHUP;
0683 }
0684
0685 if (filp->f_mode & FMODE_WRITE) {
0686 if (!pipe_full(head, tail, pipe->max_usage))
0687 mask |= EPOLLOUT | EPOLLWRNORM;
0688
0689
0690
0691
0692 if (!pipe->readers)
0693 mask |= EPOLLERR;
0694 }
0695
0696 return mask;
0697 }
0698
0699 static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
0700 {
0701 int kill = 0;
0702
0703 spin_lock(&inode->i_lock);
0704 if (!--pipe->files) {
0705 inode->i_pipe = NULL;
0706 kill = 1;
0707 }
0708 spin_unlock(&inode->i_lock);
0709
0710 if (kill)
0711 free_pipe_info(pipe);
0712 }
0713
0714 static int
0715 pipe_release(struct inode *inode, struct file *file)
0716 {
0717 struct pipe_inode_info *pipe = file->private_data;
0718
0719 __pipe_lock(pipe);
0720 if (file->f_mode & FMODE_READ)
0721 pipe->readers--;
0722 if (file->f_mode & FMODE_WRITE)
0723 pipe->writers--;
0724
0725
0726 if (!pipe->readers != !pipe->writers) {
0727 wake_up_interruptible_all(&pipe->rd_wait);
0728 wake_up_interruptible_all(&pipe->wr_wait);
0729 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
0730 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
0731 }
0732 __pipe_unlock(pipe);
0733
0734 put_pipe_info(inode, pipe);
0735 return 0;
0736 }
0737
0738 static int
0739 pipe_fasync(int fd, struct file *filp, int on)
0740 {
0741 struct pipe_inode_info *pipe = filp->private_data;
0742 int retval = 0;
0743
0744 __pipe_lock(pipe);
0745 if (filp->f_mode & FMODE_READ)
0746 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
0747 if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
0748 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
0749 if (retval < 0 && (filp->f_mode & FMODE_READ))
0750
0751 fasync_helper(-1, filp, 0, &pipe->fasync_readers);
0752 }
0753 __pipe_unlock(pipe);
0754 return retval;
0755 }
0756
0757 unsigned long account_pipe_buffers(struct user_struct *user,
0758 unsigned long old, unsigned long new)
0759 {
0760 return atomic_long_add_return(new - old, &user->pipe_bufs);
0761 }
0762
0763 bool too_many_pipe_buffers_soft(unsigned long user_bufs)
0764 {
0765 unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft);
0766
0767 return soft_limit && user_bufs > soft_limit;
0768 }
0769
0770 bool too_many_pipe_buffers_hard(unsigned long user_bufs)
0771 {
0772 unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard);
0773
0774 return hard_limit && user_bufs > hard_limit;
0775 }
0776
0777 bool pipe_is_unprivileged_user(void)
0778 {
0779 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
0780 }
0781
0782 struct pipe_inode_info *alloc_pipe_info(void)
0783 {
0784 struct pipe_inode_info *pipe;
0785 unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
0786 struct user_struct *user = get_current_user();
0787 unsigned long user_bufs;
0788 unsigned int max_size = READ_ONCE(pipe_max_size);
0789
0790 pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
0791 if (pipe == NULL)
0792 goto out_free_uid;
0793
0794 if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE))
0795 pipe_bufs = max_size >> PAGE_SHIFT;
0796
0797 user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
0798
0799 if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) {
0800 user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS);
0801 pipe_bufs = PIPE_MIN_DEF_BUFFERS;
0802 }
0803
0804 if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())
0805 goto out_revert_acct;
0806
0807 pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
0808 GFP_KERNEL_ACCOUNT);
0809
0810 if (pipe->bufs) {
0811 init_waitqueue_head(&pipe->rd_wait);
0812 init_waitqueue_head(&pipe->wr_wait);
0813 pipe->r_counter = pipe->w_counter = 1;
0814 pipe->max_usage = pipe_bufs;
0815 pipe->ring_size = pipe_bufs;
0816 pipe->nr_accounted = pipe_bufs;
0817 pipe->user = user;
0818 mutex_init(&pipe->mutex);
0819 return pipe;
0820 }
0821
0822 out_revert_acct:
0823 (void) account_pipe_buffers(user, pipe_bufs, 0);
0824 kfree(pipe);
0825 out_free_uid:
0826 free_uid(user);
0827 return NULL;
0828 }
0829
0830 void free_pipe_info(struct pipe_inode_info *pipe)
0831 {
0832 unsigned int i;
0833
0834 #ifdef CONFIG_WATCH_QUEUE
0835 if (pipe->watch_queue)
0836 watch_queue_clear(pipe->watch_queue);
0837 #endif
0838
0839 (void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0);
0840 free_uid(pipe->user);
0841 for (i = 0; i < pipe->ring_size; i++) {
0842 struct pipe_buffer *buf = pipe->bufs + i;
0843 if (buf->ops)
0844 pipe_buf_release(pipe, buf);
0845 }
0846 #ifdef CONFIG_WATCH_QUEUE
0847 if (pipe->watch_queue)
0848 put_watch_queue(pipe->watch_queue);
0849 #endif
0850 if (pipe->tmp_page)
0851 __free_page(pipe->tmp_page);
0852 kfree(pipe->bufs);
0853 kfree(pipe);
0854 }
0855
0856 static struct vfsmount *pipe_mnt __read_mostly;
0857
0858
0859
0860
0861 static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
0862 {
0863 return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
0864 d_inode(dentry)->i_ino);
0865 }
0866
0867 static const struct dentry_operations pipefs_dentry_operations = {
0868 .d_dname = pipefs_dname,
0869 };
0870
0871 static struct inode * get_pipe_inode(void)
0872 {
0873 struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
0874 struct pipe_inode_info *pipe;
0875
0876 if (!inode)
0877 goto fail_inode;
0878
0879 inode->i_ino = get_next_ino();
0880
0881 pipe = alloc_pipe_info();
0882 if (!pipe)
0883 goto fail_iput;
0884
0885 inode->i_pipe = pipe;
0886 pipe->files = 2;
0887 pipe->readers = pipe->writers = 1;
0888 inode->i_fop = &pipefifo_fops;
0889
0890
0891
0892
0893
0894
0895
0896 inode->i_state = I_DIRTY;
0897 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
0898 inode->i_uid = current_fsuid();
0899 inode->i_gid = current_fsgid();
0900 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
0901
0902 return inode;
0903
0904 fail_iput:
0905 iput(inode);
0906
0907 fail_inode:
0908 return NULL;
0909 }
0910
0911 int create_pipe_files(struct file **res, int flags)
0912 {
0913 struct inode *inode = get_pipe_inode();
0914 struct file *f;
0915 int error;
0916
0917 if (!inode)
0918 return -ENFILE;
0919
0920 if (flags & O_NOTIFICATION_PIPE) {
0921 error = watch_queue_init(inode->i_pipe);
0922 if (error) {
0923 free_pipe_info(inode->i_pipe);
0924 iput(inode);
0925 return error;
0926 }
0927 }
0928
0929 f = alloc_file_pseudo(inode, pipe_mnt, "",
0930 O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
0931 &pipefifo_fops);
0932 if (IS_ERR(f)) {
0933 free_pipe_info(inode->i_pipe);
0934 iput(inode);
0935 return PTR_ERR(f);
0936 }
0937
0938 f->private_data = inode->i_pipe;
0939
0940 res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
0941 &pipefifo_fops);
0942 if (IS_ERR(res[0])) {
0943 put_pipe_info(inode, inode->i_pipe);
0944 fput(f);
0945 return PTR_ERR(res[0]);
0946 }
0947 res[0]->private_data = inode->i_pipe;
0948 res[1] = f;
0949 stream_open(inode, res[0]);
0950 stream_open(inode, res[1]);
0951 return 0;
0952 }
0953
0954 static int __do_pipe_flags(int *fd, struct file **files, int flags)
0955 {
0956 int error;
0957 int fdw, fdr;
0958
0959 if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE))
0960 return -EINVAL;
0961
0962 error = create_pipe_files(files, flags);
0963 if (error)
0964 return error;
0965
0966 error = get_unused_fd_flags(flags);
0967 if (error < 0)
0968 goto err_read_pipe;
0969 fdr = error;
0970
0971 error = get_unused_fd_flags(flags);
0972 if (error < 0)
0973 goto err_fdr;
0974 fdw = error;
0975
0976 audit_fd_pair(fdr, fdw);
0977 fd[0] = fdr;
0978 fd[1] = fdw;
0979 return 0;
0980
0981 err_fdr:
0982 put_unused_fd(fdr);
0983 err_read_pipe:
0984 fput(files[0]);
0985 fput(files[1]);
0986 return error;
0987 }
0988
0989 int do_pipe_flags(int *fd, int flags)
0990 {
0991 struct file *files[2];
0992 int error = __do_pipe_flags(fd, files, flags);
0993 if (!error) {
0994 fd_install(fd[0], files[0]);
0995 fd_install(fd[1], files[1]);
0996 }
0997 return error;
0998 }
0999
1000
1001
1002
1003
1004 static int do_pipe2(int __user *fildes, int flags)
1005 {
1006 struct file *files[2];
1007 int fd[2];
1008 int error;
1009
1010 error = __do_pipe_flags(fd, files, flags);
1011 if (!error) {
1012 if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
1013 fput(files[0]);
1014 fput(files[1]);
1015 put_unused_fd(fd[0]);
1016 put_unused_fd(fd[1]);
1017 error = -EFAULT;
1018 } else {
1019 fd_install(fd[0], files[0]);
1020 fd_install(fd[1], files[1]);
1021 }
1022 }
1023 return error;
1024 }
1025
1026 SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
1027 {
1028 return do_pipe2(fildes, flags);
1029 }
1030
1031 SYSCALL_DEFINE1(pipe, int __user *, fildes)
1032 {
1033 return do_pipe2(fildes, 0);
1034 }
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045 void pipe_wait_readable(struct pipe_inode_info *pipe)
1046 {
1047 pipe_unlock(pipe);
1048 wait_event_interruptible(pipe->rd_wait, pipe_readable(pipe));
1049 pipe_lock(pipe);
1050 }
1051
1052 void pipe_wait_writable(struct pipe_inode_info *pipe)
1053 {
1054 pipe_unlock(pipe);
1055 wait_event_interruptible(pipe->wr_wait, pipe_writable(pipe));
1056 pipe_lock(pipe);
1057 }
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071 static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
1072 {
1073 DEFINE_WAIT(rdwait);
1074 int cur = *cnt;
1075
1076 while (cur == *cnt) {
1077 prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
1078 pipe_unlock(pipe);
1079 schedule();
1080 finish_wait(&pipe->rd_wait, &rdwait);
1081 pipe_lock(pipe);
1082 if (signal_pending(current))
1083 break;
1084 }
1085 return cur == *cnt ? -ERESTARTSYS : 0;
1086 }
1087
1088 static void wake_up_partner(struct pipe_inode_info *pipe)
1089 {
1090 wake_up_interruptible_all(&pipe->rd_wait);
1091 }
1092
1093 static int fifo_open(struct inode *inode, struct file *filp)
1094 {
1095 struct pipe_inode_info *pipe;
1096 bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
1097 int ret;
1098
1099 filp->f_version = 0;
1100
1101 spin_lock(&inode->i_lock);
1102 if (inode->i_pipe) {
1103 pipe = inode->i_pipe;
1104 pipe->files++;
1105 spin_unlock(&inode->i_lock);
1106 } else {
1107 spin_unlock(&inode->i_lock);
1108 pipe = alloc_pipe_info();
1109 if (!pipe)
1110 return -ENOMEM;
1111 pipe->files = 1;
1112 spin_lock(&inode->i_lock);
1113 if (unlikely(inode->i_pipe)) {
1114 inode->i_pipe->files++;
1115 spin_unlock(&inode->i_lock);
1116 free_pipe_info(pipe);
1117 pipe = inode->i_pipe;
1118 } else {
1119 inode->i_pipe = pipe;
1120 spin_unlock(&inode->i_lock);
1121 }
1122 }
1123 filp->private_data = pipe;
1124
1125
1126 __pipe_lock(pipe);
1127
1128
1129 stream_open(inode, filp);
1130
1131 switch (filp->f_mode & (FMODE_READ | FMODE_WRITE)) {
1132 case FMODE_READ:
1133
1134
1135
1136
1137
1138 pipe->r_counter++;
1139 if (pipe->readers++ == 0)
1140 wake_up_partner(pipe);
1141
1142 if (!is_pipe && !pipe->writers) {
1143 if ((filp->f_flags & O_NONBLOCK)) {
1144
1145
1146 filp->f_version = pipe->w_counter;
1147 } else {
1148 if (wait_for_partner(pipe, &pipe->w_counter))
1149 goto err_rd;
1150 }
1151 }
1152 break;
1153
1154 case FMODE_WRITE:
1155
1156
1157
1158
1159
1160 ret = -ENXIO;
1161 if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers)
1162 goto err;
1163
1164 pipe->w_counter++;
1165 if (!pipe->writers++)
1166 wake_up_partner(pipe);
1167
1168 if (!is_pipe && !pipe->readers) {
1169 if (wait_for_partner(pipe, &pipe->r_counter))
1170 goto err_wr;
1171 }
1172 break;
1173
1174 case FMODE_READ | FMODE_WRITE:
1175
1176
1177
1178
1179
1180
1181
1182 pipe->readers++;
1183 pipe->writers++;
1184 pipe->r_counter++;
1185 pipe->w_counter++;
1186 if (pipe->readers == 1 || pipe->writers == 1)
1187 wake_up_partner(pipe);
1188 break;
1189
1190 default:
1191 ret = -EINVAL;
1192 goto err;
1193 }
1194
1195
1196 __pipe_unlock(pipe);
1197 return 0;
1198
1199 err_rd:
1200 if (!--pipe->readers)
1201 wake_up_interruptible(&pipe->wr_wait);
1202 ret = -ERESTARTSYS;
1203 goto err;
1204
1205 err_wr:
1206 if (!--pipe->writers)
1207 wake_up_interruptible_all(&pipe->rd_wait);
1208 ret = -ERESTARTSYS;
1209 goto err;
1210
1211 err:
1212 __pipe_unlock(pipe);
1213
1214 put_pipe_info(inode, pipe);
1215 return ret;
1216 }
1217
1218 const struct file_operations pipefifo_fops = {
1219 .open = fifo_open,
1220 .llseek = no_llseek,
1221 .read_iter = pipe_read,
1222 .write_iter = pipe_write,
1223 .poll = pipe_poll,
1224 .unlocked_ioctl = pipe_ioctl,
1225 .release = pipe_release,
1226 .fasync = pipe_fasync,
1227 .splice_write = iter_file_splice_write,
1228 };
1229
1230
1231
1232
1233
1234 unsigned int round_pipe_size(unsigned long size)
1235 {
1236 if (size > (1U << 31))
1237 return 0;
1238
1239
1240 if (size < PAGE_SIZE)
1241 return PAGE_SIZE;
1242
1243 return roundup_pow_of_two(size);
1244 }
1245
1246
1247
1248
1249
1250
1251
1252
1253 int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
1254 {
1255 struct pipe_buffer *bufs;
1256 unsigned int head, tail, mask, n;
1257
1258 bufs = kcalloc(nr_slots, sizeof(*bufs),
1259 GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
1260 if (unlikely(!bufs))
1261 return -ENOMEM;
1262
1263 spin_lock_irq(&pipe->rd_wait.lock);
1264 mask = pipe->ring_size - 1;
1265 head = pipe->head;
1266 tail = pipe->tail;
1267
1268 n = pipe_occupancy(head, tail);
1269 if (nr_slots < n) {
1270 spin_unlock_irq(&pipe->rd_wait.lock);
1271 kfree(bufs);
1272 return -EBUSY;
1273 }
1274
1275
1276
1277
1278
1279 if (n > 0) {
1280 unsigned int h = head & mask;
1281 unsigned int t = tail & mask;
1282 if (h > t) {
1283 memcpy(bufs, pipe->bufs + t,
1284 n * sizeof(struct pipe_buffer));
1285 } else {
1286 unsigned int tsize = pipe->ring_size - t;
1287 if (h > 0)
1288 memcpy(bufs + tsize, pipe->bufs,
1289 h * sizeof(struct pipe_buffer));
1290 memcpy(bufs, pipe->bufs + t,
1291 tsize * sizeof(struct pipe_buffer));
1292 }
1293 }
1294
1295 head = n;
1296 tail = 0;
1297
1298 kfree(pipe->bufs);
1299 pipe->bufs = bufs;
1300 pipe->ring_size = nr_slots;
1301 if (pipe->max_usage > nr_slots)
1302 pipe->max_usage = nr_slots;
1303 pipe->tail = tail;
1304 pipe->head = head;
1305
1306 spin_unlock_irq(&pipe->rd_wait.lock);
1307
1308
1309 wake_up_interruptible(&pipe->wr_wait);
1310 return 0;
1311 }
1312
1313
1314
1315
1316
1317 static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
1318 {
1319 unsigned long user_bufs;
1320 unsigned int nr_slots, size;
1321 long ret = 0;
1322
1323 #ifdef CONFIG_WATCH_QUEUE
1324 if (pipe->watch_queue)
1325 return -EBUSY;
1326 #endif
1327
1328 size = round_pipe_size(arg);
1329 nr_slots = size >> PAGE_SHIFT;
1330
1331 if (!nr_slots)
1332 return -EINVAL;
1333
1334
1335
1336
1337
1338
1339
1340
1341 if (nr_slots > pipe->max_usage &&
1342 size > pipe_max_size && !capable(CAP_SYS_RESOURCE))
1343 return -EPERM;
1344
1345 user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_slots);
1346
1347 if (nr_slots > pipe->max_usage &&
1348 (too_many_pipe_buffers_hard(user_bufs) ||
1349 too_many_pipe_buffers_soft(user_bufs)) &&
1350 pipe_is_unprivileged_user()) {
1351 ret = -EPERM;
1352 goto out_revert_acct;
1353 }
1354
1355 ret = pipe_resize_ring(pipe, nr_slots);
1356 if (ret < 0)
1357 goto out_revert_acct;
1358
1359 pipe->max_usage = nr_slots;
1360 pipe->nr_accounted = nr_slots;
1361 return pipe->max_usage * PAGE_SIZE;
1362
1363 out_revert_acct:
1364 (void) account_pipe_buffers(pipe->user, nr_slots, pipe->nr_accounted);
1365 return ret;
1366 }
1367
1368
1369
1370
1371
1372 struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
1373 {
1374 struct pipe_inode_info *pipe = file->private_data;
1375
1376 if (file->f_op != &pipefifo_fops || !pipe)
1377 return NULL;
1378 #ifdef CONFIG_WATCH_QUEUE
1379 if (for_splice && pipe->watch_queue)
1380 return NULL;
1381 #endif
1382 return pipe;
1383 }
1384
1385 long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
1386 {
1387 struct pipe_inode_info *pipe;
1388 long ret;
1389
1390 pipe = get_pipe_info(file, false);
1391 if (!pipe)
1392 return -EBADF;
1393
1394 __pipe_lock(pipe);
1395
1396 switch (cmd) {
1397 case F_SETPIPE_SZ:
1398 ret = pipe_set_size(pipe, arg);
1399 break;
1400 case F_GETPIPE_SZ:
1401 ret = pipe->max_usage * PAGE_SIZE;
1402 break;
1403 default:
1404 ret = -EINVAL;
1405 break;
1406 }
1407
1408 __pipe_unlock(pipe);
1409 return ret;
1410 }
1411
1412 static const struct super_operations pipefs_ops = {
1413 .destroy_inode = free_inode_nonrcu,
1414 .statfs = simple_statfs,
1415 };
1416
1417
1418
1419
1420
1421
1422
1423
1424 static int pipefs_init_fs_context(struct fs_context *fc)
1425 {
1426 struct pseudo_fs_context *ctx = init_pseudo(fc, PIPEFS_MAGIC);
1427 if (!ctx)
1428 return -ENOMEM;
1429 ctx->ops = &pipefs_ops;
1430 ctx->dops = &pipefs_dentry_operations;
1431 return 0;
1432 }
1433
1434 static struct file_system_type pipe_fs_type = {
1435 .name = "pipefs",
1436 .init_fs_context = pipefs_init_fs_context,
1437 .kill_sb = kill_anon_super,
1438 };
1439
1440 #ifdef CONFIG_SYSCTL
1441 static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
1442 unsigned int *valp,
1443 int write, void *data)
1444 {
1445 if (write) {
1446 unsigned int val;
1447
1448 val = round_pipe_size(*lvalp);
1449 if (val == 0)
1450 return -EINVAL;
1451
1452 *valp = val;
1453 } else {
1454 unsigned int val = *valp;
1455 *lvalp = (unsigned long) val;
1456 }
1457
1458 return 0;
1459 }
1460
1461 static int proc_dopipe_max_size(struct ctl_table *table, int write,
1462 void *buffer, size_t *lenp, loff_t *ppos)
1463 {
1464 return do_proc_douintvec(table, write, buffer, lenp, ppos,
1465 do_proc_dopipe_max_size_conv, NULL);
1466 }
1467
1468 static struct ctl_table fs_pipe_sysctls[] = {
1469 {
1470 .procname = "pipe-max-size",
1471 .data = &pipe_max_size,
1472 .maxlen = sizeof(pipe_max_size),
1473 .mode = 0644,
1474 .proc_handler = proc_dopipe_max_size,
1475 },
1476 {
1477 .procname = "pipe-user-pages-hard",
1478 .data = &pipe_user_pages_hard,
1479 .maxlen = sizeof(pipe_user_pages_hard),
1480 .mode = 0644,
1481 .proc_handler = proc_doulongvec_minmax,
1482 },
1483 {
1484 .procname = "pipe-user-pages-soft",
1485 .data = &pipe_user_pages_soft,
1486 .maxlen = sizeof(pipe_user_pages_soft),
1487 .mode = 0644,
1488 .proc_handler = proc_doulongvec_minmax,
1489 },
1490 { }
1491 };
1492 #endif
1493
1494 static int __init init_pipe_fs(void)
1495 {
1496 int err = register_filesystem(&pipe_fs_type);
1497
1498 if (!err) {
1499 pipe_mnt = kern_mount(&pipe_fs_type);
1500 if (IS_ERR(pipe_mnt)) {
1501 err = PTR_ERR(pipe_mnt);
1502 unregister_filesystem(&pipe_fs_type);
1503 }
1504 }
1505 #ifdef CONFIG_SYSCTL
1506 register_sysctl_init("fs", fs_pipe_sysctls);
1507 #endif
1508 return err;
1509 }
1510
1511 fs_initcall(init_pipe_fs);