0001
0002 #include <linux/kernel.h>
0003 #include <linux/errno.h>
0004 #include <linux/fs.h>
0005 #include <linux/file.h>
0006 #include <linux/blk-mq.h>
0007 #include <linux/mm.h>
0008 #include <linux/slab.h>
0009 #include <linux/fsnotify.h>
0010 #include <linux/poll.h>
0011 #include <linux/nospec.h>
0012 #include <linux/compat.h>
0013 #include <linux/io_uring.h>
0014
0015 #include <uapi/linux/io_uring.h>
0016
0017 #include "io_uring.h"
0018 #include "opdef.h"
0019 #include "kbuf.h"
0020 #include "rsrc.h"
0021 #include "rw.h"
0022
0023 struct io_rw {
0024
0025 struct kiocb kiocb;
0026 u64 addr;
0027 u32 len;
0028 rwf_t flags;
0029 };
0030
0031 static inline bool io_file_supports_nowait(struct io_kiocb *req)
0032 {
0033 return req->flags & REQ_F_SUPPORT_NOWAIT;
0034 }
0035
0036 int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
0037 {
0038 struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
0039 unsigned ioprio;
0040 int ret;
0041
0042 rw->kiocb.ki_pos = READ_ONCE(sqe->off);
0043
0044 req->buf_index = READ_ONCE(sqe->buf_index);
0045
0046 if (req->opcode == IORING_OP_READ_FIXED ||
0047 req->opcode == IORING_OP_WRITE_FIXED) {
0048 struct io_ring_ctx *ctx = req->ctx;
0049 u16 index;
0050
0051 if (unlikely(req->buf_index >= ctx->nr_user_bufs))
0052 return -EFAULT;
0053 index = array_index_nospec(req->buf_index, ctx->nr_user_bufs);
0054 req->imu = ctx->user_bufs[index];
0055 io_req_set_rsrc_node(req, ctx, 0);
0056 }
0057
0058 ioprio = READ_ONCE(sqe->ioprio);
0059 if (ioprio) {
0060 ret = ioprio_check_cap(ioprio);
0061 if (ret)
0062 return ret;
0063
0064 rw->kiocb.ki_ioprio = ioprio;
0065 } else {
0066 rw->kiocb.ki_ioprio = get_current_ioprio();
0067 }
0068
0069 rw->addr = READ_ONCE(sqe->addr);
0070 rw->len = READ_ONCE(sqe->len);
0071 rw->flags = READ_ONCE(sqe->rw_flags);
0072 return 0;
0073 }
0074
0075 void io_readv_writev_cleanup(struct io_kiocb *req)
0076 {
0077 struct io_async_rw *io = req->async_data;
0078
0079 kfree(io->free_iovec);
0080 }
0081
0082 static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
0083 {
0084 switch (ret) {
0085 case -EIOCBQUEUED:
0086 break;
0087 case -ERESTARTSYS:
0088 case -ERESTARTNOINTR:
0089 case -ERESTARTNOHAND:
0090 case -ERESTART_RESTARTBLOCK:
0091
0092
0093
0094
0095
0096 ret = -EINTR;
0097 fallthrough;
0098 default:
0099 kiocb->ki_complete(kiocb, ret);
0100 }
0101 }
0102
0103 static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
0104 {
0105 struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
0106
0107 if (rw->kiocb.ki_pos != -1)
0108 return &rw->kiocb.ki_pos;
0109
0110 if (!(req->file->f_mode & FMODE_STREAM)) {
0111 req->flags |= REQ_F_CUR_POS;
0112 rw->kiocb.ki_pos = req->file->f_pos;
0113 return &rw->kiocb.ki_pos;
0114 }
0115
0116 rw->kiocb.ki_pos = 0;
0117 return NULL;
0118 }
0119
0120 static void io_req_task_queue_reissue(struct io_kiocb *req)
0121 {
0122 req->io_task_work.func = io_queue_iowq;
0123 io_req_task_work_add(req);
0124 }
0125
0126 #ifdef CONFIG_BLOCK
0127 static bool io_resubmit_prep(struct io_kiocb *req)
0128 {
0129 struct io_async_rw *io = req->async_data;
0130
0131 if (!req_has_async_data(req))
0132 return !io_req_prep_async(req);
0133 iov_iter_restore(&io->s.iter, &io->s.iter_state);
0134 return true;
0135 }
0136
0137 static bool io_rw_should_reissue(struct io_kiocb *req)
0138 {
0139 umode_t mode = file_inode(req->file)->i_mode;
0140 struct io_ring_ctx *ctx = req->ctx;
0141
0142 if (!S_ISBLK(mode) && !S_ISREG(mode))
0143 return false;
0144 if ((req->flags & REQ_F_NOWAIT) || (io_wq_current_is_worker() &&
0145 !(ctx->flags & IORING_SETUP_IOPOLL)))
0146 return false;
0147
0148
0149
0150
0151
0152 if (percpu_ref_is_dying(&ctx->refs))
0153 return false;
0154
0155
0156
0157
0158 if (!same_thread_group(req->task, current) || !in_task())
0159 return false;
0160 return true;
0161 }
0162 #else
0163 static bool io_resubmit_prep(struct io_kiocb *req)
0164 {
0165 return false;
0166 }
0167 static bool io_rw_should_reissue(struct io_kiocb *req)
0168 {
0169 return false;
0170 }
0171 #endif
0172
0173 static void kiocb_end_write(struct io_kiocb *req)
0174 {
0175
0176
0177
0178
0179 if (req->flags & REQ_F_ISREG) {
0180 struct super_block *sb = file_inode(req->file)->i_sb;
0181
0182 __sb_writers_acquired(sb, SB_FREEZE_WRITE);
0183 sb_end_write(sb);
0184 }
0185 }
0186
0187 static bool __io_complete_rw_common(struct io_kiocb *req, long res)
0188 {
0189 struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
0190
0191 if (rw->kiocb.ki_flags & IOCB_WRITE) {
0192 kiocb_end_write(req);
0193 fsnotify_modify(req->file);
0194 } else {
0195 fsnotify_access(req->file);
0196 }
0197 if (unlikely(res != req->cqe.res)) {
0198 if ((res == -EAGAIN || res == -EOPNOTSUPP) &&
0199 io_rw_should_reissue(req)) {
0200 req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO;
0201 return true;
0202 }
0203 req_set_fail(req);
0204 req->cqe.res = res;
0205 }
0206 return false;
0207 }
0208
0209 static inline int io_fixup_rw_res(struct io_kiocb *req, long res)
0210 {
0211 struct io_async_rw *io = req->async_data;
0212
0213
0214 if (req_has_async_data(req) && io->bytes_done > 0) {
0215 if (res < 0)
0216 res = io->bytes_done;
0217 else
0218 res += io->bytes_done;
0219 }
0220 return res;
0221 }
0222
0223 static void io_complete_rw(struct kiocb *kiocb, long res)
0224 {
0225 struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb);
0226 struct io_kiocb *req = cmd_to_io_kiocb(rw);
0227
0228 if (__io_complete_rw_common(req, res))
0229 return;
0230 io_req_set_res(req, io_fixup_rw_res(req, res), 0);
0231 req->io_task_work.func = io_req_task_complete;
0232 io_req_task_work_add(req);
0233 }
0234
0235 static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
0236 {
0237 struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb);
0238 struct io_kiocb *req = cmd_to_io_kiocb(rw);
0239
0240 if (kiocb->ki_flags & IOCB_WRITE)
0241 kiocb_end_write(req);
0242 if (unlikely(res != req->cqe.res)) {
0243 if (res == -EAGAIN && io_rw_should_reissue(req)) {
0244 req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO;
0245 return;
0246 }
0247 req->cqe.res = res;
0248 }
0249
0250
0251 smp_store_release(&req->iopoll_completed, 1);
0252 }
0253
0254 static int kiocb_done(struct io_kiocb *req, ssize_t ret,
0255 unsigned int issue_flags)
0256 {
0257 struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
0258 unsigned final_ret = io_fixup_rw_res(req, ret);
0259
0260 if (req->flags & REQ_F_CUR_POS)
0261 req->file->f_pos = rw->kiocb.ki_pos;
0262 if (ret >= 0 && (rw->kiocb.ki_complete == io_complete_rw)) {
0263 if (!__io_complete_rw_common(req, ret)) {
0264 io_req_set_res(req, final_ret,
0265 io_put_kbuf(req, issue_flags));
0266 return IOU_OK;
0267 }
0268 } else {
0269 io_rw_done(&rw->kiocb, ret);
0270 }
0271
0272 if (req->flags & REQ_F_REISSUE) {
0273 req->flags &= ~REQ_F_REISSUE;
0274 if (io_resubmit_prep(req))
0275 io_req_task_queue_reissue(req);
0276 else
0277 io_req_task_queue_fail(req, final_ret);
0278 }
0279 return IOU_ISSUE_SKIP_COMPLETE;
0280 }
0281
0282 #ifdef CONFIG_COMPAT
0283 static ssize_t io_compat_import(struct io_kiocb *req, struct iovec *iov,
0284 unsigned int issue_flags)
0285 {
0286 struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
0287 struct compat_iovec __user *uiov;
0288 compat_ssize_t clen;
0289 void __user *buf;
0290 size_t len;
0291
0292 uiov = u64_to_user_ptr(rw->addr);
0293 if (!access_ok(uiov, sizeof(*uiov)))
0294 return -EFAULT;
0295 if (__get_user(clen, &uiov->iov_len))
0296 return -EFAULT;
0297 if (clen < 0)
0298 return -EINVAL;
0299
0300 len = clen;
0301 buf = io_buffer_select(req, &len, issue_flags);
0302 if (!buf)
0303 return -ENOBUFS;
0304 rw->addr = (unsigned long) buf;
0305 iov[0].iov_base = buf;
0306 rw->len = iov[0].iov_len = (compat_size_t) len;
0307 return 0;
0308 }
0309 #endif
0310
0311 static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
0312 unsigned int issue_flags)
0313 {
0314 struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
0315 struct iovec __user *uiov = u64_to_user_ptr(rw->addr);
0316 void __user *buf;
0317 ssize_t len;
0318
0319 if (copy_from_user(iov, uiov, sizeof(*uiov)))
0320 return -EFAULT;
0321
0322 len = iov[0].iov_len;
0323 if (len < 0)
0324 return -EINVAL;
0325 buf = io_buffer_select(req, &len, issue_flags);
0326 if (!buf)
0327 return -ENOBUFS;
0328 rw->addr = (unsigned long) buf;
0329 iov[0].iov_base = buf;
0330 rw->len = iov[0].iov_len = len;
0331 return 0;
0332 }
0333
0334 static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
0335 unsigned int issue_flags)
0336 {
0337 struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
0338
0339 if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) {
0340 iov[0].iov_base = u64_to_user_ptr(rw->addr);
0341 iov[0].iov_len = rw->len;
0342 return 0;
0343 }
0344 if (rw->len != 1)
0345 return -EINVAL;
0346
0347 #ifdef CONFIG_COMPAT
0348 if (req->ctx->compat)
0349 return io_compat_import(req, iov, issue_flags);
0350 #endif
0351
0352 return __io_iov_buffer_select(req, iov, issue_flags);
0353 }
0354
0355 static struct iovec *__io_import_iovec(int ddir, struct io_kiocb *req,
0356 struct io_rw_state *s,
0357 unsigned int issue_flags)
0358 {
0359 struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
0360 struct iov_iter *iter = &s->iter;
0361 u8 opcode = req->opcode;
0362 struct iovec *iovec;
0363 void __user *buf;
0364 size_t sqe_len;
0365 ssize_t ret;
0366
0367 if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
0368 ret = io_import_fixed(ddir, iter, req->imu, rw->addr, rw->len);
0369 if (ret)
0370 return ERR_PTR(ret);
0371 return NULL;
0372 }
0373
0374 buf = u64_to_user_ptr(rw->addr);
0375 sqe_len = rw->len;
0376
0377 if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) {
0378 if (io_do_buffer_select(req)) {
0379 buf = io_buffer_select(req, &sqe_len, issue_flags);
0380 if (!buf)
0381 return ERR_PTR(-ENOBUFS);
0382 rw->addr = (unsigned long) buf;
0383 rw->len = sqe_len;
0384 }
0385
0386 ret = import_single_range(ddir, buf, sqe_len, s->fast_iov, iter);
0387 if (ret)
0388 return ERR_PTR(ret);
0389 return NULL;
0390 }
0391
0392 iovec = s->fast_iov;
0393 if (req->flags & REQ_F_BUFFER_SELECT) {
0394 ret = io_iov_buffer_select(req, iovec, issue_flags);
0395 if (ret)
0396 return ERR_PTR(ret);
0397 iov_iter_init(iter, ddir, iovec, 1, iovec->iov_len);
0398 return NULL;
0399 }
0400
0401 ret = __import_iovec(ddir, buf, sqe_len, UIO_FASTIOV, &iovec, iter,
0402 req->ctx->compat);
0403 if (unlikely(ret < 0))
0404 return ERR_PTR(ret);
0405 return iovec;
0406 }
0407
0408 static inline int io_import_iovec(int rw, struct io_kiocb *req,
0409 struct iovec **iovec, struct io_rw_state *s,
0410 unsigned int issue_flags)
0411 {
0412 *iovec = __io_import_iovec(rw, req, s, issue_flags);
0413 if (unlikely(IS_ERR(*iovec)))
0414 return PTR_ERR(*iovec);
0415
0416 iov_iter_save_state(&s->iter, &s->iter_state);
0417 return 0;
0418 }
0419
0420 static inline loff_t *io_kiocb_ppos(struct kiocb *kiocb)
0421 {
0422 return (kiocb->ki_filp->f_mode & FMODE_STREAM) ? NULL : &kiocb->ki_pos;
0423 }
0424
0425
0426
0427
0428
0429 static ssize_t loop_rw_iter(int ddir, struct io_rw *rw, struct iov_iter *iter)
0430 {
0431 struct kiocb *kiocb = &rw->kiocb;
0432 struct file *file = kiocb->ki_filp;
0433 ssize_t ret = 0;
0434 loff_t *ppos;
0435
0436
0437
0438
0439
0440
0441 if (kiocb->ki_flags & IOCB_HIPRI)
0442 return -EOPNOTSUPP;
0443 if ((kiocb->ki_flags & IOCB_NOWAIT) &&
0444 !(kiocb->ki_filp->f_flags & O_NONBLOCK))
0445 return -EAGAIN;
0446
0447 ppos = io_kiocb_ppos(kiocb);
0448
0449 while (iov_iter_count(iter)) {
0450 struct iovec iovec;
0451 ssize_t nr;
0452
0453 if (!iov_iter_is_bvec(iter)) {
0454 iovec = iov_iter_iovec(iter);
0455 } else {
0456 iovec.iov_base = u64_to_user_ptr(rw->addr);
0457 iovec.iov_len = rw->len;
0458 }
0459
0460 if (ddir == READ) {
0461 nr = file->f_op->read(file, iovec.iov_base,
0462 iovec.iov_len, ppos);
0463 } else {
0464 nr = file->f_op->write(file, iovec.iov_base,
0465 iovec.iov_len, ppos);
0466 }
0467
0468 if (nr < 0) {
0469 if (!ret)
0470 ret = nr;
0471 break;
0472 }
0473 ret += nr;
0474 if (!iov_iter_is_bvec(iter)) {
0475 iov_iter_advance(iter, nr);
0476 } else {
0477 rw->addr += nr;
0478 rw->len -= nr;
0479 if (!rw->len)
0480 break;
0481 }
0482 if (nr != iovec.iov_len)
0483 break;
0484 }
0485
0486 return ret;
0487 }
0488
0489 static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec,
0490 const struct iovec *fast_iov, struct iov_iter *iter)
0491 {
0492 struct io_async_rw *io = req->async_data;
0493
0494 memcpy(&io->s.iter, iter, sizeof(*iter));
0495 io->free_iovec = iovec;
0496 io->bytes_done = 0;
0497
0498 if (iov_iter_is_bvec(iter))
0499 return;
0500 if (!iovec) {
0501 unsigned iov_off = 0;
0502
0503 io->s.iter.iov = io->s.fast_iov;
0504 if (iter->iov != fast_iov) {
0505 iov_off = iter->iov - fast_iov;
0506 io->s.iter.iov += iov_off;
0507 }
0508 if (io->s.fast_iov != fast_iov)
0509 memcpy(io->s.fast_iov + iov_off, fast_iov + iov_off,
0510 sizeof(struct iovec) * iter->nr_segs);
0511 } else {
0512 req->flags |= REQ_F_NEED_CLEANUP;
0513 }
0514 }
0515
0516 static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
0517 struct io_rw_state *s, bool force)
0518 {
0519 if (!force && !io_op_defs[req->opcode].prep_async)
0520 return 0;
0521 if (!req_has_async_data(req)) {
0522 struct io_async_rw *iorw;
0523
0524 if (io_alloc_async_data(req)) {
0525 kfree(iovec);
0526 return -ENOMEM;
0527 }
0528
0529 io_req_map_rw(req, iovec, s->fast_iov, &s->iter);
0530 iorw = req->async_data;
0531
0532 iov_iter_save_state(&iorw->s.iter, &iorw->s.iter_state);
0533 }
0534 return 0;
0535 }
0536
0537 static inline int io_rw_prep_async(struct io_kiocb *req, int rw)
0538 {
0539 struct io_async_rw *iorw = req->async_data;
0540 struct iovec *iov;
0541 int ret;
0542
0543
0544 ret = io_import_iovec(rw, req, &iov, &iorw->s, 0);
0545 if (unlikely(ret < 0))
0546 return ret;
0547
0548 iorw->bytes_done = 0;
0549 iorw->free_iovec = iov;
0550 if (iov)
0551 req->flags |= REQ_F_NEED_CLEANUP;
0552 return 0;
0553 }
0554
0555 int io_readv_prep_async(struct io_kiocb *req)
0556 {
0557 return io_rw_prep_async(req, READ);
0558 }
0559
0560 int io_writev_prep_async(struct io_kiocb *req)
0561 {
0562 return io_rw_prep_async(req, WRITE);
0563 }
0564
0565
0566
0567
0568
0569
0570
0571
0572
0573
0574
0575 static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
0576 int sync, void *arg)
0577 {
0578 struct wait_page_queue *wpq;
0579 struct io_kiocb *req = wait->private;
0580 struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
0581 struct wait_page_key *key = arg;
0582
0583 wpq = container_of(wait, struct wait_page_queue, wait);
0584
0585 if (!wake_page_match(wpq, key))
0586 return 0;
0587
0588 rw->kiocb.ki_flags &= ~IOCB_WAITQ;
0589 list_del_init(&wait->entry);
0590 io_req_task_queue(req);
0591 return 1;
0592 }
0593
0594
0595
0596
0597
0598
0599
0600
0601
0602
0603
0604
0605
0606 static bool io_rw_should_retry(struct io_kiocb *req)
0607 {
0608 struct io_async_rw *io = req->async_data;
0609 struct wait_page_queue *wait = &io->wpq;
0610 struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
0611 struct kiocb *kiocb = &rw->kiocb;
0612
0613
0614 if (req->flags & REQ_F_NOWAIT)
0615 return false;
0616
0617
0618 if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI))
0619 return false;
0620
0621
0622
0623
0624
0625 if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC))
0626 return false;
0627
0628 wait->wait.func = io_async_buf_func;
0629 wait->wait.private = req;
0630 wait->wait.flags = 0;
0631 INIT_LIST_HEAD(&wait->wait.entry);
0632 kiocb->ki_flags |= IOCB_WAITQ;
0633 kiocb->ki_flags &= ~IOCB_NOWAIT;
0634 kiocb->ki_waitq = wait;
0635 return true;
0636 }
0637
0638 static inline int io_iter_do_read(struct io_rw *rw, struct iov_iter *iter)
0639 {
0640 struct file *file = rw->kiocb.ki_filp;
0641
0642 if (likely(file->f_op->read_iter))
0643 return call_read_iter(file, &rw->kiocb, iter);
0644 else if (file->f_op->read)
0645 return loop_rw_iter(READ, rw, iter);
0646 else
0647 return -EINVAL;
0648 }
0649
0650 static bool need_complete_io(struct io_kiocb *req)
0651 {
0652 return req->flags & REQ_F_ISREG ||
0653 S_ISBLK(file_inode(req->file)->i_mode);
0654 }
0655
0656 static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
0657 {
0658 struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
0659 struct kiocb *kiocb = &rw->kiocb;
0660 struct io_ring_ctx *ctx = req->ctx;
0661 struct file *file = req->file;
0662 int ret;
0663
0664 if (unlikely(!file || !(file->f_mode & mode)))
0665 return -EBADF;
0666
0667 if (!io_req_ffs_set(req))
0668 req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT;
0669
0670 kiocb->ki_flags = file->f_iocb_flags;
0671 ret = kiocb_set_rw_flags(kiocb, rw->flags);
0672 if (unlikely(ret))
0673 return ret;
0674
0675
0676
0677
0678
0679
0680 if ((kiocb->ki_flags & IOCB_NOWAIT) ||
0681 ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req)))
0682 req->flags |= REQ_F_NOWAIT;
0683
0684 if (ctx->flags & IORING_SETUP_IOPOLL) {
0685 if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll)
0686 return -EOPNOTSUPP;
0687
0688 kiocb->private = NULL;
0689 kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE;
0690 kiocb->ki_complete = io_complete_rw_iopoll;
0691 req->iopoll_completed = 0;
0692 } else {
0693 if (kiocb->ki_flags & IOCB_HIPRI)
0694 return -EINVAL;
0695 kiocb->ki_complete = io_complete_rw;
0696 }
0697
0698 return 0;
0699 }
0700
0701 int io_read(struct io_kiocb *req, unsigned int issue_flags)
0702 {
0703 struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
0704 struct io_rw_state __s, *s = &__s;
0705 struct iovec *iovec;
0706 struct kiocb *kiocb = &rw->kiocb;
0707 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
0708 struct io_async_rw *io;
0709 ssize_t ret, ret2;
0710 loff_t *ppos;
0711
0712 if (!req_has_async_data(req)) {
0713 ret = io_import_iovec(READ, req, &iovec, s, issue_flags);
0714 if (unlikely(ret < 0))
0715 return ret;
0716 } else {
0717 io = req->async_data;
0718 s = &io->s;
0719
0720
0721
0722
0723
0724 if (io_do_buffer_select(req)) {
0725 ret = io_import_iovec(READ, req, &iovec, s, issue_flags);
0726 if (unlikely(ret < 0))
0727 return ret;
0728 }
0729
0730
0731
0732
0733
0734
0735 iov_iter_restore(&s->iter, &s->iter_state);
0736 iovec = NULL;
0737 }
0738 ret = io_rw_init_file(req, FMODE_READ);
0739 if (unlikely(ret)) {
0740 kfree(iovec);
0741 return ret;
0742 }
0743 req->cqe.res = iov_iter_count(&s->iter);
0744
0745 if (force_nonblock) {
0746
0747 if (unlikely(!io_file_supports_nowait(req))) {
0748 ret = io_setup_async_rw(req, iovec, s, true);
0749 return ret ?: -EAGAIN;
0750 }
0751 kiocb->ki_flags |= IOCB_NOWAIT;
0752 } else {
0753
0754 kiocb->ki_flags &= ~IOCB_NOWAIT;
0755 }
0756
0757 ppos = io_kiocb_update_pos(req);
0758
0759 ret = rw_verify_area(READ, req->file, ppos, req->cqe.res);
0760 if (unlikely(ret)) {
0761 kfree(iovec);
0762 return ret;
0763 }
0764
0765 ret = io_iter_do_read(rw, &s->iter);
0766
0767 if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) {
0768 req->flags &= ~REQ_F_REISSUE;
0769
0770 if (req->opcode == IORING_OP_READ && file_can_poll(req->file))
0771 return -EAGAIN;
0772
0773 if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
0774 goto done;
0775
0776 if (req->flags & REQ_F_NOWAIT)
0777 goto done;
0778 ret = 0;
0779 } else if (ret == -EIOCBQUEUED) {
0780 if (iovec)
0781 kfree(iovec);
0782 return IOU_ISSUE_SKIP_COMPLETE;
0783 } else if (ret == req->cqe.res || ret <= 0 || !force_nonblock ||
0784 (req->flags & REQ_F_NOWAIT) || !need_complete_io(req)) {
0785
0786 goto done;
0787 }
0788
0789
0790
0791
0792
0793
0794 iov_iter_restore(&s->iter, &s->iter_state);
0795
0796 ret2 = io_setup_async_rw(req, iovec, s, true);
0797 if (ret2)
0798 return ret2;
0799
0800 iovec = NULL;
0801 io = req->async_data;
0802 s = &io->s;
0803
0804
0805
0806
0807
0808 do {
0809
0810
0811
0812
0813
0814 iov_iter_advance(&s->iter, ret);
0815 if (!iov_iter_count(&s->iter))
0816 break;
0817 io->bytes_done += ret;
0818 iov_iter_save_state(&s->iter, &s->iter_state);
0819
0820
0821 if (!io_rw_should_retry(req)) {
0822 kiocb->ki_flags &= ~IOCB_WAITQ;
0823 return -EAGAIN;
0824 }
0825
0826
0827
0828
0829
0830
0831
0832 ret = io_iter_do_read(rw, &s->iter);
0833 if (ret == -EIOCBQUEUED)
0834 return IOU_ISSUE_SKIP_COMPLETE;
0835
0836 kiocb->ki_flags &= ~IOCB_WAITQ;
0837 iov_iter_restore(&s->iter, &s->iter_state);
0838 } while (ret > 0);
0839 done:
0840
0841 if (iovec)
0842 kfree(iovec);
0843 return kiocb_done(req, ret, issue_flags);
0844 }
0845
0846 int io_write(struct io_kiocb *req, unsigned int issue_flags)
0847 {
0848 struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
0849 struct io_rw_state __s, *s = &__s;
0850 struct iovec *iovec;
0851 struct kiocb *kiocb = &rw->kiocb;
0852 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
0853 ssize_t ret, ret2;
0854 loff_t *ppos;
0855
0856 if (!req_has_async_data(req)) {
0857 ret = io_import_iovec(WRITE, req, &iovec, s, issue_flags);
0858 if (unlikely(ret < 0))
0859 return ret;
0860 } else {
0861 struct io_async_rw *io = req->async_data;
0862
0863 s = &io->s;
0864 iov_iter_restore(&s->iter, &s->iter_state);
0865 iovec = NULL;
0866 }
0867 ret = io_rw_init_file(req, FMODE_WRITE);
0868 if (unlikely(ret)) {
0869 kfree(iovec);
0870 return ret;
0871 }
0872 req->cqe.res = iov_iter_count(&s->iter);
0873
0874 if (force_nonblock) {
0875
0876 if (unlikely(!io_file_supports_nowait(req)))
0877 goto copy_iov;
0878
0879
0880 if (!(kiocb->ki_flags & IOCB_DIRECT) &&
0881 !(kiocb->ki_filp->f_mode & FMODE_BUF_WASYNC) &&
0882 (req->flags & REQ_F_ISREG))
0883 goto copy_iov;
0884
0885 kiocb->ki_flags |= IOCB_NOWAIT;
0886 } else {
0887
0888 kiocb->ki_flags &= ~IOCB_NOWAIT;
0889 }
0890
0891 ppos = io_kiocb_update_pos(req);
0892
0893 ret = rw_verify_area(WRITE, req->file, ppos, req->cqe.res);
0894 if (unlikely(ret)) {
0895 kfree(iovec);
0896 return ret;
0897 }
0898
0899
0900
0901
0902
0903
0904
0905
0906 if (req->flags & REQ_F_ISREG) {
0907 sb_start_write(file_inode(req->file)->i_sb);
0908 __sb_writers_release(file_inode(req->file)->i_sb,
0909 SB_FREEZE_WRITE);
0910 }
0911 kiocb->ki_flags |= IOCB_WRITE;
0912
0913 if (likely(req->file->f_op->write_iter))
0914 ret2 = call_write_iter(req->file, kiocb, &s->iter);
0915 else if (req->file->f_op->write)
0916 ret2 = loop_rw_iter(WRITE, rw, &s->iter);
0917 else
0918 ret2 = -EINVAL;
0919
0920 if (req->flags & REQ_F_REISSUE) {
0921 req->flags &= ~REQ_F_REISSUE;
0922 ret2 = -EAGAIN;
0923 }
0924
0925
0926
0927
0928
0929 if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT))
0930 ret2 = -EAGAIN;
0931
0932 if (ret2 == -EAGAIN && (req->flags & REQ_F_NOWAIT))
0933 goto done;
0934 if (!force_nonblock || ret2 != -EAGAIN) {
0935
0936 if (ret2 == -EAGAIN && (req->ctx->flags & IORING_SETUP_IOPOLL))
0937 goto copy_iov;
0938
0939 if (ret2 != req->cqe.res && ret2 >= 0 && need_complete_io(req)) {
0940 struct io_async_rw *rw;
0941
0942 trace_io_uring_short_write(req->ctx, kiocb->ki_pos - ret2,
0943 req->cqe.res, ret2);
0944
0945
0946
0947
0948
0949
0950 iov_iter_save_state(&s->iter, &s->iter_state);
0951 ret = io_setup_async_rw(req, iovec, s, true);
0952
0953 rw = req->async_data;
0954 if (rw)
0955 rw->bytes_done += ret2;
0956
0957 if (kiocb->ki_flags & IOCB_WRITE)
0958 kiocb_end_write(req);
0959 return ret ? ret : -EAGAIN;
0960 }
0961 done:
0962 ret = kiocb_done(req, ret2, issue_flags);
0963 } else {
0964 copy_iov:
0965 iov_iter_restore(&s->iter, &s->iter_state);
0966 ret = io_setup_async_rw(req, iovec, s, false);
0967 if (!ret) {
0968 if (kiocb->ki_flags & IOCB_WRITE)
0969 kiocb_end_write(req);
0970 return -EAGAIN;
0971 }
0972 return ret;
0973 }
0974
0975 if (iovec)
0976 kfree(iovec);
0977 return ret;
0978 }
0979
0980 static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
0981 {
0982 io_commit_cqring_flush(ctx);
0983 if (ctx->flags & IORING_SETUP_SQPOLL)
0984 io_cqring_wake(ctx);
0985 }
0986
0987 int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
0988 {
0989 struct io_wq_work_node *pos, *start, *prev;
0990 unsigned int poll_flags = BLK_POLL_NOSLEEP;
0991 DEFINE_IO_COMP_BATCH(iob);
0992 int nr_events = 0;
0993
0994
0995
0996
0997
0998 if (ctx->poll_multi_queue || force_nonspin)
0999 poll_flags |= BLK_POLL_ONESHOT;
1000
1001 wq_list_for_each(pos, start, &ctx->iopoll_list) {
1002 struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list);
1003 struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
1004 int ret;
1005
1006
1007
1008
1009
1010
1011 if (READ_ONCE(req->iopoll_completed))
1012 break;
1013
1014 ret = rw->kiocb.ki_filp->f_op->iopoll(&rw->kiocb, &iob, poll_flags);
1015 if (unlikely(ret < 0))
1016 return ret;
1017 else if (ret)
1018 poll_flags |= BLK_POLL_ONESHOT;
1019
1020
1021 if (!rq_list_empty(iob.req_list) ||
1022 READ_ONCE(req->iopoll_completed))
1023 break;
1024 }
1025
1026 if (!rq_list_empty(iob.req_list))
1027 iob.complete(&iob);
1028 else if (!pos)
1029 return 0;
1030
1031 prev = start;
1032 wq_list_for_each_resume(pos, prev) {
1033 struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list);
1034
1035
1036 if (!smp_load_acquire(&req->iopoll_completed))
1037 break;
1038 nr_events++;
1039 if (unlikely(req->flags & REQ_F_CQE_SKIP))
1040 continue;
1041
1042 req->cqe.flags = io_put_kbuf(req, 0);
1043 __io_fill_cqe_req(req->ctx, req);
1044 }
1045
1046 if (unlikely(!nr_events))
1047 return 0;
1048
1049 io_commit_cqring(ctx);
1050 io_cqring_ev_posted_iopoll(ctx);
1051 pos = start ? start->next : ctx->iopoll_list.first;
1052 wq_list_cut(&ctx->iopoll_list, prev, start);
1053 io_free_batch_list(ctx, pos);
1054 return nr_events;
1055 }