1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/file.h>
5 #include <linux/io_uring.h>
7 #include <trace/events/io_uring.h>
9 #include <uapi/linux/io_uring.h>
21 struct list_head list
;
22 /* head of the link, used by linked timeouts only */
23 struct io_kiocb
*head
;
24 /* for linked completions */
25 struct io_kiocb
*prev
;
28 struct io_timeout_rem
{
38 static inline bool io_is_timeout_noseq(struct io_kiocb
*req
)
40 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
41 struct io_timeout_data
*data
= req
->async_data
;
43 return !timeout
->off
|| data
->flags
& IORING_TIMEOUT_MULTISHOT
;
46 static inline void io_put_req(struct io_kiocb
*req
)
48 if (req_ref_put_and_test(req
)) {
54 static inline bool io_timeout_finish(struct io_timeout
*timeout
,
55 struct io_timeout_data
*data
)
57 if (!(data
->flags
& IORING_TIMEOUT_MULTISHOT
))
60 if (!timeout
->off
|| (timeout
->repeats
&& --timeout
->repeats
))
66 static enum hrtimer_restart
io_timeout_fn(struct hrtimer
*timer
);
68 static void io_timeout_complete(struct io_kiocb
*req
, struct io_tw_state
*ts
)
70 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
71 struct io_timeout_data
*data
= req
->async_data
;
72 struct io_ring_ctx
*ctx
= req
->ctx
;
74 if (!io_timeout_finish(timeout
, data
)) {
76 filled
= io_fill_cqe_req_aux(req
, ts
->locked
, -ETIME
,
80 spin_lock_irq(&ctx
->timeout_lock
);
81 list_add(&timeout
->list
, ctx
->timeout_list
.prev
);
82 data
->timer
.function
= io_timeout_fn
;
83 hrtimer_start(&data
->timer
, timespec64_to_ktime(data
->ts
), data
->mode
);
84 spin_unlock_irq(&ctx
->timeout_lock
);
89 io_req_task_complete(req
, ts
);
92 static bool io_kill_timeout(struct io_kiocb
*req
, int status
)
93 __must_hold(&req
->ctx
->timeout_lock
)
95 struct io_timeout_data
*io
= req
->async_data
;
97 if (hrtimer_try_to_cancel(&io
->timer
) != -1) {
98 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
102 atomic_set(&req
->ctx
->cq_timeouts
,
103 atomic_read(&req
->ctx
->cq_timeouts
) + 1);
104 list_del_init(&timeout
->list
);
105 io_req_queue_tw_complete(req
, status
);
111 __cold
void io_flush_timeouts(struct io_ring_ctx
*ctx
)
114 struct io_timeout
*timeout
, *tmp
;
116 spin_lock_irq(&ctx
->timeout_lock
);
117 seq
= ctx
->cached_cq_tail
- atomic_read(&ctx
->cq_timeouts
);
119 list_for_each_entry_safe(timeout
, tmp
, &ctx
->timeout_list
, list
) {
120 struct io_kiocb
*req
= cmd_to_io_kiocb(timeout
);
121 u32 events_needed
, events_got
;
123 if (io_is_timeout_noseq(req
))
127 * Since seq can easily wrap around over time, subtract
128 * the last seq at which timeouts were flushed before comparing.
129 * Assuming not more than 2^31-1 events have happened since,
130 * these subtractions won't have wrapped, so we can check if
131 * target is in [last_seq, current_seq] by comparing the two.
133 events_needed
= timeout
->target_seq
- ctx
->cq_last_tm_flush
;
134 events_got
= seq
- ctx
->cq_last_tm_flush
;
135 if (events_got
< events_needed
)
138 io_kill_timeout(req
, 0);
140 ctx
->cq_last_tm_flush
= seq
;
141 spin_unlock_irq(&ctx
->timeout_lock
);
144 static void io_req_tw_fail_links(struct io_kiocb
*link
, struct io_tw_state
*ts
)
146 io_tw_lock(link
->ctx
, ts
);
148 struct io_kiocb
*nxt
= link
->link
;
149 long res
= -ECANCELED
;
151 if (link
->flags
& REQ_F_FAIL
)
154 io_req_set_res(link
, res
, 0);
155 io_req_task_complete(link
, ts
);
160 static void io_fail_links(struct io_kiocb
*req
)
161 __must_hold(&req
->ctx
->completion_lock
)
163 struct io_kiocb
*link
= req
->link
;
164 bool ignore_cqes
= req
->flags
& REQ_F_SKIP_LINK_CQES
;
171 link
->flags
|= REQ_F_CQE_SKIP
;
173 link
->flags
&= ~REQ_F_CQE_SKIP
;
174 trace_io_uring_fail_link(req
, link
);
179 link
->io_task_work
.func
= io_req_tw_fail_links
;
180 io_req_task_work_add(link
);
184 static inline void io_remove_next_linked(struct io_kiocb
*req
)
186 struct io_kiocb
*nxt
= req
->link
;
188 req
->link
= nxt
->link
;
192 void io_disarm_next(struct io_kiocb
*req
)
193 __must_hold(&req
->ctx
->completion_lock
)
195 struct io_kiocb
*link
= NULL
;
197 if (req
->flags
& REQ_F_ARM_LTIMEOUT
) {
199 req
->flags
&= ~REQ_F_ARM_LTIMEOUT
;
200 if (link
&& link
->opcode
== IORING_OP_LINK_TIMEOUT
) {
201 io_remove_next_linked(req
);
202 io_req_queue_tw_complete(link
, -ECANCELED
);
204 } else if (req
->flags
& REQ_F_LINK_TIMEOUT
) {
205 struct io_ring_ctx
*ctx
= req
->ctx
;
207 spin_lock_irq(&ctx
->timeout_lock
);
208 link
= io_disarm_linked_timeout(req
);
209 spin_unlock_irq(&ctx
->timeout_lock
);
211 io_req_queue_tw_complete(link
, -ECANCELED
);
213 if (unlikely((req
->flags
& REQ_F_FAIL
) &&
214 !(req
->flags
& REQ_F_HARDLINK
)))
218 struct io_kiocb
*__io_disarm_linked_timeout(struct io_kiocb
*req
,
219 struct io_kiocb
*link
)
220 __must_hold(&req
->ctx
->completion_lock
)
221 __must_hold(&req
->ctx
->timeout_lock
)
223 struct io_timeout_data
*io
= link
->async_data
;
224 struct io_timeout
*timeout
= io_kiocb_to_cmd(link
, struct io_timeout
);
226 io_remove_next_linked(req
);
227 timeout
->head
= NULL
;
228 if (hrtimer_try_to_cancel(&io
->timer
) != -1) {
229 list_del(&timeout
->list
);
236 static enum hrtimer_restart
io_timeout_fn(struct hrtimer
*timer
)
238 struct io_timeout_data
*data
= container_of(timer
,
239 struct io_timeout_data
, timer
);
240 struct io_kiocb
*req
= data
->req
;
241 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
242 struct io_ring_ctx
*ctx
= req
->ctx
;
245 spin_lock_irqsave(&ctx
->timeout_lock
, flags
);
246 list_del_init(&timeout
->list
);
247 atomic_set(&req
->ctx
->cq_timeouts
,
248 atomic_read(&req
->ctx
->cq_timeouts
) + 1);
249 spin_unlock_irqrestore(&ctx
->timeout_lock
, flags
);
251 if (!(data
->flags
& IORING_TIMEOUT_ETIME_SUCCESS
))
254 io_req_set_res(req
, -ETIME
, 0);
255 req
->io_task_work
.func
= io_timeout_complete
;
256 io_req_task_work_add(req
);
257 return HRTIMER_NORESTART
;
260 static struct io_kiocb
*io_timeout_extract(struct io_ring_ctx
*ctx
,
261 struct io_cancel_data
*cd
)
262 __must_hold(&ctx
->timeout_lock
)
264 struct io_timeout
*timeout
;
265 struct io_timeout_data
*io
;
266 struct io_kiocb
*req
= NULL
;
268 list_for_each_entry(timeout
, &ctx
->timeout_list
, list
) {
269 struct io_kiocb
*tmp
= cmd_to_io_kiocb(timeout
);
271 if (io_cancel_req_match(tmp
, cd
)) {
277 return ERR_PTR(-ENOENT
);
279 io
= req
->async_data
;
280 if (hrtimer_try_to_cancel(&io
->timer
) == -1)
281 return ERR_PTR(-EALREADY
);
282 timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
283 list_del_init(&timeout
->list
);
287 int io_timeout_cancel(struct io_ring_ctx
*ctx
, struct io_cancel_data
*cd
)
288 __must_hold(&ctx
->completion_lock
)
290 struct io_kiocb
*req
;
292 spin_lock_irq(&ctx
->timeout_lock
);
293 req
= io_timeout_extract(ctx
, cd
);
294 spin_unlock_irq(&ctx
->timeout_lock
);
298 io_req_task_queue_fail(req
, -ECANCELED
);
302 static void io_req_task_link_timeout(struct io_kiocb
*req
, struct io_tw_state
*ts
)
304 unsigned issue_flags
= ts
->locked
? 0 : IO_URING_F_UNLOCKED
;
305 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
306 struct io_kiocb
*prev
= timeout
->prev
;
310 if (!(req
->task
->flags
& PF_EXITING
)) {
311 struct io_cancel_data cd
= {
313 .data
= prev
->cqe
.user_data
,
316 ret
= io_try_cancel(req
->task
->io_uring
, &cd
, issue_flags
);
318 io_req_set_res(req
, ret
?: -ETIME
, 0);
319 io_req_task_complete(req
, ts
);
322 io_req_set_res(req
, -ETIME
, 0);
323 io_req_task_complete(req
, ts
);
327 static enum hrtimer_restart
io_link_timeout_fn(struct hrtimer
*timer
)
329 struct io_timeout_data
*data
= container_of(timer
,
330 struct io_timeout_data
, timer
);
331 struct io_kiocb
*prev
, *req
= data
->req
;
332 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
333 struct io_ring_ctx
*ctx
= req
->ctx
;
336 spin_lock_irqsave(&ctx
->timeout_lock
, flags
);
337 prev
= timeout
->head
;
338 timeout
->head
= NULL
;
341 * We don't expect the list to be empty, that will only happen if we
342 * race with the completion of the linked work.
345 io_remove_next_linked(prev
);
346 if (!req_ref_inc_not_zero(prev
))
349 list_del(&timeout
->list
);
350 timeout
->prev
= prev
;
351 spin_unlock_irqrestore(&ctx
->timeout_lock
, flags
);
353 req
->io_task_work
.func
= io_req_task_link_timeout
;
354 io_req_task_work_add(req
);
355 return HRTIMER_NORESTART
;
358 static clockid_t
io_timeout_get_clock(struct io_timeout_data
*data
)
360 switch (data
->flags
& IORING_TIMEOUT_CLOCK_MASK
) {
361 case IORING_TIMEOUT_BOOTTIME
:
362 return CLOCK_BOOTTIME
;
363 case IORING_TIMEOUT_REALTIME
:
364 return CLOCK_REALTIME
;
366 /* can't happen, vetted at prep time */
370 return CLOCK_MONOTONIC
;
374 static int io_linked_timeout_update(struct io_ring_ctx
*ctx
, __u64 user_data
,
375 struct timespec64
*ts
, enum hrtimer_mode mode
)
376 __must_hold(&ctx
->timeout_lock
)
378 struct io_timeout_data
*io
;
379 struct io_timeout
*timeout
;
380 struct io_kiocb
*req
= NULL
;
382 list_for_each_entry(timeout
, &ctx
->ltimeout_list
, list
) {
383 struct io_kiocb
*tmp
= cmd_to_io_kiocb(timeout
);
385 if (user_data
== tmp
->cqe
.user_data
) {
393 io
= req
->async_data
;
394 if (hrtimer_try_to_cancel(&io
->timer
) == -1)
396 hrtimer_init(&io
->timer
, io_timeout_get_clock(io
), mode
);
397 io
->timer
.function
= io_link_timeout_fn
;
398 hrtimer_start(&io
->timer
, timespec64_to_ktime(*ts
), mode
);
402 static int io_timeout_update(struct io_ring_ctx
*ctx
, __u64 user_data
,
403 struct timespec64
*ts
, enum hrtimer_mode mode
)
404 __must_hold(&ctx
->timeout_lock
)
406 struct io_cancel_data cd
= { .ctx
= ctx
, .data
= user_data
, };
407 struct io_kiocb
*req
= io_timeout_extract(ctx
, &cd
);
408 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
409 struct io_timeout_data
*data
;
414 timeout
->off
= 0; /* noseq */
415 data
= req
->async_data
;
416 list_add_tail(&timeout
->list
, &ctx
->timeout_list
);
417 hrtimer_init(&data
->timer
, io_timeout_get_clock(data
), mode
);
418 data
->timer
.function
= io_timeout_fn
;
419 hrtimer_start(&data
->timer
, timespec64_to_ktime(*ts
), mode
);
423 int io_timeout_remove_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
425 struct io_timeout_rem
*tr
= io_kiocb_to_cmd(req
, struct io_timeout_rem
);
427 if (unlikely(req
->flags
& (REQ_F_FIXED_FILE
| REQ_F_BUFFER_SELECT
)))
429 if (sqe
->buf_index
|| sqe
->len
|| sqe
->splice_fd_in
)
432 tr
->ltimeout
= false;
433 tr
->addr
= READ_ONCE(sqe
->addr
);
434 tr
->flags
= READ_ONCE(sqe
->timeout_flags
);
435 if (tr
->flags
& IORING_TIMEOUT_UPDATE_MASK
) {
436 if (hweight32(tr
->flags
& IORING_TIMEOUT_CLOCK_MASK
) > 1)
438 if (tr
->flags
& IORING_LINK_TIMEOUT_UPDATE
)
440 if (tr
->flags
& ~(IORING_TIMEOUT_UPDATE_MASK
|IORING_TIMEOUT_ABS
))
442 if (get_timespec64(&tr
->ts
, u64_to_user_ptr(sqe
->addr2
)))
444 if (tr
->ts
.tv_sec
< 0 || tr
->ts
.tv_nsec
< 0)
446 } else if (tr
->flags
) {
447 /* timeout removal doesn't support flags */
454 static inline enum hrtimer_mode
io_translate_timeout_mode(unsigned int flags
)
456 return (flags
& IORING_TIMEOUT_ABS
) ? HRTIMER_MODE_ABS
461 * Remove or update an existing timeout command
463 int io_timeout_remove(struct io_kiocb
*req
, unsigned int issue_flags
)
465 struct io_timeout_rem
*tr
= io_kiocb_to_cmd(req
, struct io_timeout_rem
);
466 struct io_ring_ctx
*ctx
= req
->ctx
;
469 if (!(tr
->flags
& IORING_TIMEOUT_UPDATE
)) {
470 struct io_cancel_data cd
= { .ctx
= ctx
, .data
= tr
->addr
, };
472 spin_lock(&ctx
->completion_lock
);
473 ret
= io_timeout_cancel(ctx
, &cd
);
474 spin_unlock(&ctx
->completion_lock
);
476 enum hrtimer_mode mode
= io_translate_timeout_mode(tr
->flags
);
478 spin_lock_irq(&ctx
->timeout_lock
);
480 ret
= io_linked_timeout_update(ctx
, tr
->addr
, &tr
->ts
, mode
);
482 ret
= io_timeout_update(ctx
, tr
->addr
, &tr
->ts
, mode
);
483 spin_unlock_irq(&ctx
->timeout_lock
);
488 io_req_set_res(req
, ret
, 0);
492 static int __io_timeout_prep(struct io_kiocb
*req
,
493 const struct io_uring_sqe
*sqe
,
494 bool is_timeout_link
)
496 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
497 struct io_timeout_data
*data
;
499 u32 off
= READ_ONCE(sqe
->off
);
501 if (sqe
->buf_index
|| sqe
->len
!= 1 || sqe
->splice_fd_in
)
503 if (off
&& is_timeout_link
)
505 flags
= READ_ONCE(sqe
->timeout_flags
);
506 if (flags
& ~(IORING_TIMEOUT_ABS
| IORING_TIMEOUT_CLOCK_MASK
|
507 IORING_TIMEOUT_ETIME_SUCCESS
|
508 IORING_TIMEOUT_MULTISHOT
))
510 /* more than one clock specified is invalid, obviously */
511 if (hweight32(flags
& IORING_TIMEOUT_CLOCK_MASK
) > 1)
513 /* multishot requests only make sense with rel values */
514 if (!(~flags
& (IORING_TIMEOUT_MULTISHOT
| IORING_TIMEOUT_ABS
)))
517 INIT_LIST_HEAD(&timeout
->list
);
519 if (unlikely(off
&& !req
->ctx
->off_timeout_used
))
520 req
->ctx
->off_timeout_used
= true;
522 * for multishot reqs w/ fixed nr of repeats, repeats tracks the
525 timeout
->repeats
= 0;
526 if ((flags
& IORING_TIMEOUT_MULTISHOT
) && off
> 0)
527 timeout
->repeats
= off
;
529 if (WARN_ON_ONCE(req_has_async_data(req
)))
531 if (io_alloc_async_data(req
))
534 data
= req
->async_data
;
538 if (get_timespec64(&data
->ts
, u64_to_user_ptr(sqe
->addr
)))
541 if (data
->ts
.tv_sec
< 0 || data
->ts
.tv_nsec
< 0)
544 INIT_LIST_HEAD(&timeout
->list
);
545 data
->mode
= io_translate_timeout_mode(flags
);
546 hrtimer_init(&data
->timer
, io_timeout_get_clock(data
), data
->mode
);
548 if (is_timeout_link
) {
549 struct io_submit_link
*link
= &req
->ctx
->submit_state
.link
;
553 if (link
->last
->opcode
== IORING_OP_LINK_TIMEOUT
)
555 timeout
->head
= link
->last
;
556 link
->last
->flags
|= REQ_F_ARM_LTIMEOUT
;
561 int io_timeout_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
563 return __io_timeout_prep(req
, sqe
, false);
566 int io_link_timeout_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
568 return __io_timeout_prep(req
, sqe
, true);
571 int io_timeout(struct io_kiocb
*req
, unsigned int issue_flags
)
573 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
574 struct io_ring_ctx
*ctx
= req
->ctx
;
575 struct io_timeout_data
*data
= req
->async_data
;
576 struct list_head
*entry
;
577 u32 tail
, off
= timeout
->off
;
579 spin_lock_irq(&ctx
->timeout_lock
);
582 * sqe->off holds how many events that need to occur for this
583 * timeout event to be satisfied. If it isn't set, then this is
584 * a pure timeout request, sequence isn't used.
586 if (io_is_timeout_noseq(req
)) {
587 entry
= ctx
->timeout_list
.prev
;
591 tail
= data_race(ctx
->cached_cq_tail
) - atomic_read(&ctx
->cq_timeouts
);
592 timeout
->target_seq
= tail
+ off
;
594 /* Update the last seq here in case io_flush_timeouts() hasn't.
595 * This is safe because ->completion_lock is held, and submissions
596 * and completions are never mixed in the same ->completion_lock section.
598 ctx
->cq_last_tm_flush
= tail
;
601 * Insertion sort, ensuring the first entry in the list is always
602 * the one we need first.
604 list_for_each_prev(entry
, &ctx
->timeout_list
) {
605 struct io_timeout
*nextt
= list_entry(entry
, struct io_timeout
, list
);
606 struct io_kiocb
*nxt
= cmd_to_io_kiocb(nextt
);
608 if (io_is_timeout_noseq(nxt
))
610 /* nxt.seq is behind @tail, otherwise would've been completed */
611 if (off
>= nextt
->target_seq
- tail
)
615 list_add(&timeout
->list
, entry
);
616 data
->timer
.function
= io_timeout_fn
;
617 hrtimer_start(&data
->timer
, timespec64_to_ktime(data
->ts
), data
->mode
);
618 spin_unlock_irq(&ctx
->timeout_lock
);
619 return IOU_ISSUE_SKIP_COMPLETE
;
622 void io_queue_linked_timeout(struct io_kiocb
*req
)
624 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
625 struct io_ring_ctx
*ctx
= req
->ctx
;
627 spin_lock_irq(&ctx
->timeout_lock
);
629 * If the back reference is NULL, then our linked request finished
630 * before we got a chance to setup the timer
633 struct io_timeout_data
*data
= req
->async_data
;
635 data
->timer
.function
= io_link_timeout_fn
;
636 hrtimer_start(&data
->timer
, timespec64_to_ktime(data
->ts
),
638 list_add_tail(&timeout
->list
, &ctx
->ltimeout_list
);
640 spin_unlock_irq(&ctx
->timeout_lock
);
641 /* drop submission reference */
645 static bool io_match_task(struct io_kiocb
*head
, struct task_struct
*task
,
647 __must_hold(&req
->ctx
->timeout_lock
)
649 struct io_kiocb
*req
;
651 if (task
&& head
->task
!= task
)
656 io_for_each_link(req
, head
) {
657 if (req
->flags
& REQ_F_INFLIGHT
)
663 /* Returns true if we found and killed one or more timeouts */
664 __cold
bool io_kill_timeouts(struct io_ring_ctx
*ctx
, struct task_struct
*tsk
,
667 struct io_timeout
*timeout
, *tmp
;
671 * completion_lock is needed for io_match_task(). Take it before
672 * timeout_lockfirst to keep locking ordering.
674 spin_lock(&ctx
->completion_lock
);
675 spin_lock_irq(&ctx
->timeout_lock
);
676 list_for_each_entry_safe(timeout
, tmp
, &ctx
->timeout_list
, list
) {
677 struct io_kiocb
*req
= cmd_to_io_kiocb(timeout
);
679 if (io_match_task(req
, tsk
, cancel_all
) &&
680 io_kill_timeout(req
, -ECANCELED
))
683 spin_unlock_irq(&ctx
->timeout_lock
);
684 spin_unlock(&ctx
->completion_lock
);
685 return canceled
!= 0;