0001
0002 #include <linux/kernel.h>
0003 #include <linux/errno.h>
0004 #include <linux/file.h>
0005 #include <linux/io_uring.h>
0006
0007 #include <trace/events/io_uring.h>
0008
0009 #include <uapi/linux/io_uring.h>
0010
0011 #include "io_uring.h"
0012 #include "refs.h"
0013 #include "cancel.h"
0014 #include "timeout.h"
0015
0016 struct io_timeout {
0017 struct file *file;
0018 u32 off;
0019 u32 target_seq;
0020 struct list_head list;
0021
0022 struct io_kiocb *head;
0023
0024 struct io_kiocb *prev;
0025 };
0026
0027 struct io_timeout_rem {
0028 struct file *file;
0029 u64 addr;
0030
0031
0032 struct timespec64 ts;
0033 u32 flags;
0034 bool ltimeout;
0035 };
0036
0037 static inline bool io_is_timeout_noseq(struct io_kiocb *req)
0038 {
0039 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
0040
0041 return !timeout->off;
0042 }
0043
0044 static inline void io_put_req(struct io_kiocb *req)
0045 {
0046 if (req_ref_put_and_test(req)) {
0047 io_queue_next(req);
0048 io_free_req(req);
0049 }
0050 }
0051
0052 static bool io_kill_timeout(struct io_kiocb *req, int status)
0053 __must_hold(&req->ctx->completion_lock)
0054 __must_hold(&req->ctx->timeout_lock)
0055 {
0056 struct io_timeout_data *io = req->async_data;
0057
0058 if (hrtimer_try_to_cancel(&io->timer) != -1) {
0059 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
0060
0061 if (status)
0062 req_set_fail(req);
0063 atomic_set(&req->ctx->cq_timeouts,
0064 atomic_read(&req->ctx->cq_timeouts) + 1);
0065 list_del_init(&timeout->list);
0066 io_req_tw_post_queue(req, status, 0);
0067 return true;
0068 }
0069 return false;
0070 }
0071
0072 __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
0073 __must_hold(&ctx->completion_lock)
0074 {
0075 u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
0076 struct io_timeout *timeout, *tmp;
0077
0078 spin_lock_irq(&ctx->timeout_lock);
0079 list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
0080 struct io_kiocb *req = cmd_to_io_kiocb(timeout);
0081 u32 events_needed, events_got;
0082
0083 if (io_is_timeout_noseq(req))
0084 break;
0085
0086
0087
0088
0089
0090
0091
0092
0093 events_needed = timeout->target_seq - ctx->cq_last_tm_flush;
0094 events_got = seq - ctx->cq_last_tm_flush;
0095 if (events_got < events_needed)
0096 break;
0097
0098 io_kill_timeout(req, 0);
0099 }
0100 ctx->cq_last_tm_flush = seq;
0101 spin_unlock_irq(&ctx->timeout_lock);
0102 }
0103
0104 static void io_req_tw_fail_links(struct io_kiocb *link, bool *locked)
0105 {
0106 io_tw_lock(link->ctx, locked);
0107 while (link) {
0108 struct io_kiocb *nxt = link->link;
0109 long res = -ECANCELED;
0110
0111 if (link->flags & REQ_F_FAIL)
0112 res = link->cqe.res;
0113 link->link = NULL;
0114 io_req_set_res(link, res, 0);
0115 io_req_task_complete(link, locked);
0116 link = nxt;
0117 }
0118 }
0119
0120 static void io_fail_links(struct io_kiocb *req)
0121 __must_hold(&req->ctx->completion_lock)
0122 {
0123 struct io_kiocb *link = req->link;
0124 bool ignore_cqes = req->flags & REQ_F_SKIP_LINK_CQES;
0125
0126 if (!link)
0127 return;
0128
0129 while (link) {
0130 if (ignore_cqes)
0131 link->flags |= REQ_F_CQE_SKIP;
0132 else
0133 link->flags &= ~REQ_F_CQE_SKIP;
0134 trace_io_uring_fail_link(req, link);
0135 link = link->link;
0136 }
0137
0138 link = req->link;
0139 link->io_task_work.func = io_req_tw_fail_links;
0140 io_req_task_work_add(link);
0141 req->link = NULL;
0142 }
0143
0144 static inline void io_remove_next_linked(struct io_kiocb *req)
0145 {
0146 struct io_kiocb *nxt = req->link;
0147
0148 req->link = nxt->link;
0149 nxt->link = NULL;
0150 }
0151
0152 bool io_disarm_next(struct io_kiocb *req)
0153 __must_hold(&req->ctx->completion_lock)
0154 {
0155 struct io_kiocb *link = NULL;
0156 bool posted = false;
0157
0158 if (req->flags & REQ_F_ARM_LTIMEOUT) {
0159 link = req->link;
0160 req->flags &= ~REQ_F_ARM_LTIMEOUT;
0161 if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
0162 io_remove_next_linked(req);
0163 io_req_tw_post_queue(link, -ECANCELED, 0);
0164 posted = true;
0165 }
0166 } else if (req->flags & REQ_F_LINK_TIMEOUT) {
0167 struct io_ring_ctx *ctx = req->ctx;
0168
0169 spin_lock_irq(&ctx->timeout_lock);
0170 link = io_disarm_linked_timeout(req);
0171 spin_unlock_irq(&ctx->timeout_lock);
0172 if (link) {
0173 posted = true;
0174 io_req_tw_post_queue(link, -ECANCELED, 0);
0175 }
0176 }
0177 if (unlikely((req->flags & REQ_F_FAIL) &&
0178 !(req->flags & REQ_F_HARDLINK))) {
0179 posted |= (req->link != NULL);
0180 io_fail_links(req);
0181 }
0182 return posted;
0183 }
0184
0185 struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req,
0186 struct io_kiocb *link)
0187 __must_hold(&req->ctx->completion_lock)
0188 __must_hold(&req->ctx->timeout_lock)
0189 {
0190 struct io_timeout_data *io = link->async_data;
0191 struct io_timeout *timeout = io_kiocb_to_cmd(link, struct io_timeout);
0192
0193 io_remove_next_linked(req);
0194 timeout->head = NULL;
0195 if (hrtimer_try_to_cancel(&io->timer) != -1) {
0196 list_del(&timeout->list);
0197 return link;
0198 }
0199
0200 return NULL;
0201 }
0202
0203 static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
0204 {
0205 struct io_timeout_data *data = container_of(timer,
0206 struct io_timeout_data, timer);
0207 struct io_kiocb *req = data->req;
0208 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
0209 struct io_ring_ctx *ctx = req->ctx;
0210 unsigned long flags;
0211
0212 spin_lock_irqsave(&ctx->timeout_lock, flags);
0213 list_del_init(&timeout->list);
0214 atomic_set(&req->ctx->cq_timeouts,
0215 atomic_read(&req->ctx->cq_timeouts) + 1);
0216 spin_unlock_irqrestore(&ctx->timeout_lock, flags);
0217
0218 if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS))
0219 req_set_fail(req);
0220
0221 io_req_set_res(req, -ETIME, 0);
0222 req->io_task_work.func = io_req_task_complete;
0223 io_req_task_work_add(req);
0224 return HRTIMER_NORESTART;
0225 }
0226
0227 static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
0228 struct io_cancel_data *cd)
0229 __must_hold(&ctx->timeout_lock)
0230 {
0231 struct io_timeout *timeout;
0232 struct io_timeout_data *io;
0233 struct io_kiocb *req = NULL;
0234
0235 list_for_each_entry(timeout, &ctx->timeout_list, list) {
0236 struct io_kiocb *tmp = cmd_to_io_kiocb(timeout);
0237
0238 if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) &&
0239 cd->data != tmp->cqe.user_data)
0240 continue;
0241 if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) {
0242 if (cd->seq == tmp->work.cancel_seq)
0243 continue;
0244 tmp->work.cancel_seq = cd->seq;
0245 }
0246 req = tmp;
0247 break;
0248 }
0249 if (!req)
0250 return ERR_PTR(-ENOENT);
0251
0252 io = req->async_data;
0253 if (hrtimer_try_to_cancel(&io->timer) == -1)
0254 return ERR_PTR(-EALREADY);
0255 timeout = io_kiocb_to_cmd(req, struct io_timeout);
0256 list_del_init(&timeout->list);
0257 return req;
0258 }
0259
0260 int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
0261 __must_hold(&ctx->completion_lock)
0262 {
0263 struct io_kiocb *req;
0264
0265 spin_lock_irq(&ctx->timeout_lock);
0266 req = io_timeout_extract(ctx, cd);
0267 spin_unlock_irq(&ctx->timeout_lock);
0268
0269 if (IS_ERR(req))
0270 return PTR_ERR(req);
0271 io_req_task_queue_fail(req, -ECANCELED);
0272 return 0;
0273 }
0274
0275 static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
0276 {
0277 unsigned issue_flags = *locked ? 0 : IO_URING_F_UNLOCKED;
0278 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
0279 struct io_kiocb *prev = timeout->prev;
0280 int ret = -ENOENT;
0281
0282 if (prev) {
0283 if (!(req->task->flags & PF_EXITING)) {
0284 struct io_cancel_data cd = {
0285 .ctx = req->ctx,
0286 .data = prev->cqe.user_data,
0287 };
0288
0289 ret = io_try_cancel(req->task->io_uring, &cd, issue_flags);
0290 }
0291 io_req_set_res(req, ret ?: -ETIME, 0);
0292 io_req_complete_post(req);
0293 io_put_req(prev);
0294 } else {
0295 io_req_set_res(req, -ETIME, 0);
0296 io_req_complete_post(req);
0297 }
0298 }
0299
0300 static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
0301 {
0302 struct io_timeout_data *data = container_of(timer,
0303 struct io_timeout_data, timer);
0304 struct io_kiocb *prev, *req = data->req;
0305 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
0306 struct io_ring_ctx *ctx = req->ctx;
0307 unsigned long flags;
0308
0309 spin_lock_irqsave(&ctx->timeout_lock, flags);
0310 prev = timeout->head;
0311 timeout->head = NULL;
0312
0313
0314
0315
0316
0317 if (prev) {
0318 io_remove_next_linked(prev);
0319 if (!req_ref_inc_not_zero(prev))
0320 prev = NULL;
0321 }
0322 list_del(&timeout->list);
0323 timeout->prev = prev;
0324 spin_unlock_irqrestore(&ctx->timeout_lock, flags);
0325
0326 req->io_task_work.func = io_req_task_link_timeout;
0327 io_req_task_work_add(req);
0328 return HRTIMER_NORESTART;
0329 }
0330
0331 static clockid_t io_timeout_get_clock(struct io_timeout_data *data)
0332 {
0333 switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) {
0334 case IORING_TIMEOUT_BOOTTIME:
0335 return CLOCK_BOOTTIME;
0336 case IORING_TIMEOUT_REALTIME:
0337 return CLOCK_REALTIME;
0338 default:
0339
0340 WARN_ON_ONCE(1);
0341 fallthrough;
0342 case 0:
0343 return CLOCK_MONOTONIC;
0344 }
0345 }
0346
0347 static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
0348 struct timespec64 *ts, enum hrtimer_mode mode)
0349 __must_hold(&ctx->timeout_lock)
0350 {
0351 struct io_timeout_data *io;
0352 struct io_timeout *timeout;
0353 struct io_kiocb *req = NULL;
0354
0355 list_for_each_entry(timeout, &ctx->ltimeout_list, list) {
0356 struct io_kiocb *tmp = cmd_to_io_kiocb(timeout);
0357
0358 if (user_data == tmp->cqe.user_data) {
0359 req = tmp;
0360 break;
0361 }
0362 }
0363 if (!req)
0364 return -ENOENT;
0365
0366 io = req->async_data;
0367 if (hrtimer_try_to_cancel(&io->timer) == -1)
0368 return -EALREADY;
0369 hrtimer_init(&io->timer, io_timeout_get_clock(io), mode);
0370 io->timer.function = io_link_timeout_fn;
0371 hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode);
0372 return 0;
0373 }
0374
0375 static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
0376 struct timespec64 *ts, enum hrtimer_mode mode)
0377 __must_hold(&ctx->timeout_lock)
0378 {
0379 struct io_cancel_data cd = { .data = user_data, };
0380 struct io_kiocb *req = io_timeout_extract(ctx, &cd);
0381 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
0382 struct io_timeout_data *data;
0383
0384 if (IS_ERR(req))
0385 return PTR_ERR(req);
0386
0387 timeout->off = 0;
0388 data = req->async_data;
0389 list_add_tail(&timeout->list, &ctx->timeout_list);
0390 hrtimer_init(&data->timer, io_timeout_get_clock(data), mode);
0391 data->timer.function = io_timeout_fn;
0392 hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode);
0393 return 0;
0394 }
0395
0396 int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
0397 {
0398 struct io_timeout_rem *tr = io_kiocb_to_cmd(req, struct io_timeout_rem);
0399
0400 if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
0401 return -EINVAL;
0402 if (sqe->buf_index || sqe->len || sqe->splice_fd_in)
0403 return -EINVAL;
0404
0405 tr->ltimeout = false;
0406 tr->addr = READ_ONCE(sqe->addr);
0407 tr->flags = READ_ONCE(sqe->timeout_flags);
0408 if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) {
0409 if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
0410 return -EINVAL;
0411 if (tr->flags & IORING_LINK_TIMEOUT_UPDATE)
0412 tr->ltimeout = true;
0413 if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS))
0414 return -EINVAL;
0415 if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2)))
0416 return -EFAULT;
0417 if (tr->ts.tv_sec < 0 || tr->ts.tv_nsec < 0)
0418 return -EINVAL;
0419 } else if (tr->flags) {
0420
0421 return -EINVAL;
0422 }
0423
0424 return 0;
0425 }
0426
0427 static inline enum hrtimer_mode io_translate_timeout_mode(unsigned int flags)
0428 {
0429 return (flags & IORING_TIMEOUT_ABS) ? HRTIMER_MODE_ABS
0430 : HRTIMER_MODE_REL;
0431 }
0432
0433
0434
0435
0436 int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
0437 {
0438 struct io_timeout_rem *tr = io_kiocb_to_cmd(req, struct io_timeout_rem);
0439 struct io_ring_ctx *ctx = req->ctx;
0440 int ret;
0441
0442 if (!(tr->flags & IORING_TIMEOUT_UPDATE)) {
0443 struct io_cancel_data cd = { .data = tr->addr, };
0444
0445 spin_lock(&ctx->completion_lock);
0446 ret = io_timeout_cancel(ctx, &cd);
0447 spin_unlock(&ctx->completion_lock);
0448 } else {
0449 enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags);
0450
0451 spin_lock_irq(&ctx->timeout_lock);
0452 if (tr->ltimeout)
0453 ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode);
0454 else
0455 ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode);
0456 spin_unlock_irq(&ctx->timeout_lock);
0457 }
0458
0459 if (ret < 0)
0460 req_set_fail(req);
0461 io_req_set_res(req, ret, 0);
0462 return IOU_OK;
0463 }
0464
0465 static int __io_timeout_prep(struct io_kiocb *req,
0466 const struct io_uring_sqe *sqe,
0467 bool is_timeout_link)
0468 {
0469 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
0470 struct io_timeout_data *data;
0471 unsigned flags;
0472 u32 off = READ_ONCE(sqe->off);
0473
0474 if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in)
0475 return -EINVAL;
0476 if (off && is_timeout_link)
0477 return -EINVAL;
0478 flags = READ_ONCE(sqe->timeout_flags);
0479 if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK |
0480 IORING_TIMEOUT_ETIME_SUCCESS))
0481 return -EINVAL;
0482
0483 if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
0484 return -EINVAL;
0485
0486 INIT_LIST_HEAD(&timeout->list);
0487 timeout->off = off;
0488 if (unlikely(off && !req->ctx->off_timeout_used))
0489 req->ctx->off_timeout_used = true;
0490
0491 if (WARN_ON_ONCE(req_has_async_data(req)))
0492 return -EFAULT;
0493 if (io_alloc_async_data(req))
0494 return -ENOMEM;
0495
0496 data = req->async_data;
0497 data->req = req;
0498 data->flags = flags;
0499
0500 if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr)))
0501 return -EFAULT;
0502
0503 if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0)
0504 return -EINVAL;
0505
0506 INIT_LIST_HEAD(&timeout->list);
0507 data->mode = io_translate_timeout_mode(flags);
0508 hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode);
0509
0510 if (is_timeout_link) {
0511 struct io_submit_link *link = &req->ctx->submit_state.link;
0512
0513 if (!link->head)
0514 return -EINVAL;
0515 if (link->last->opcode == IORING_OP_LINK_TIMEOUT)
0516 return -EINVAL;
0517 timeout->head = link->last;
0518 link->last->flags |= REQ_F_ARM_LTIMEOUT;
0519 }
0520 return 0;
0521 }
0522
0523 int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
0524 {
0525 return __io_timeout_prep(req, sqe, false);
0526 }
0527
0528 int io_link_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
0529 {
0530 return __io_timeout_prep(req, sqe, true);
0531 }
0532
0533 int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
0534 {
0535 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
0536 struct io_ring_ctx *ctx = req->ctx;
0537 struct io_timeout_data *data = req->async_data;
0538 struct list_head *entry;
0539 u32 tail, off = timeout->off;
0540
0541 spin_lock_irq(&ctx->timeout_lock);
0542
0543
0544
0545
0546
0547
0548 if (io_is_timeout_noseq(req)) {
0549 entry = ctx->timeout_list.prev;
0550 goto add;
0551 }
0552
0553 tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
0554 timeout->target_seq = tail + off;
0555
0556
0557
0558
0559
0560 ctx->cq_last_tm_flush = tail;
0561
0562
0563
0564
0565
0566 list_for_each_prev(entry, &ctx->timeout_list) {
0567 struct io_timeout *nextt = list_entry(entry, struct io_timeout, list);
0568 struct io_kiocb *nxt = cmd_to_io_kiocb(nextt);
0569
0570 if (io_is_timeout_noseq(nxt))
0571 continue;
0572
0573 if (off >= nextt->target_seq - tail)
0574 break;
0575 }
0576 add:
0577 list_add(&timeout->list, entry);
0578 data->timer.function = io_timeout_fn;
0579 hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
0580 spin_unlock_irq(&ctx->timeout_lock);
0581 return IOU_ISSUE_SKIP_COMPLETE;
0582 }
0583
0584 void io_queue_linked_timeout(struct io_kiocb *req)
0585 {
0586 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
0587 struct io_ring_ctx *ctx = req->ctx;
0588
0589 spin_lock_irq(&ctx->timeout_lock);
0590
0591
0592
0593
0594 if (timeout->head) {
0595 struct io_timeout_data *data = req->async_data;
0596
0597 data->timer.function = io_link_timeout_fn;
0598 hrtimer_start(&data->timer, timespec64_to_ktime(data->ts),
0599 data->mode);
0600 list_add_tail(&timeout->list, &ctx->ltimeout_list);
0601 }
0602 spin_unlock_irq(&ctx->timeout_lock);
0603
0604 io_put_req(req);
0605 }
0606
0607 static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
0608 bool cancel_all)
0609 __must_hold(&req->ctx->timeout_lock)
0610 {
0611 struct io_kiocb *req;
0612
0613 if (task && head->task != task)
0614 return false;
0615 if (cancel_all)
0616 return true;
0617
0618 io_for_each_link(req, head) {
0619 if (req->flags & REQ_F_INFLIGHT)
0620 return true;
0621 }
0622 return false;
0623 }
0624
0625
0626 __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
0627 bool cancel_all)
0628 {
0629 struct io_timeout *timeout, *tmp;
0630 int canceled = 0;
0631
0632 io_cq_lock(ctx);
0633 spin_lock_irq(&ctx->timeout_lock);
0634 list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
0635 struct io_kiocb *req = cmd_to_io_kiocb(timeout);
0636
0637 if (io_match_task(req, tsk, cancel_all) &&
0638 io_kill_timeout(req, -ECANCELED))
0639 canceled++;
0640 }
0641 spin_unlock_irq(&ctx->timeout_lock);
0642 io_cq_unlock_post(ctx);
0643 return canceled != 0;
0644 }