1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/file.h>
5 #include <linux/io_uring.h>
7 #include <trace/events/io_uring.h>
9 #include <uapi/linux/io_uring.h>
20 struct list_head list
;
21 /* head of the link, used by linked timeouts only */
22 struct io_kiocb
*head
;
23 /* for linked completions */
24 struct io_kiocb
*prev
;
27 struct io_timeout_rem
{
37 static inline bool io_is_timeout_noseq(struct io_kiocb
*req
)
39 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
44 static inline void io_put_req(struct io_kiocb
*req
)
46 if (req_ref_put_and_test(req
)) {
52 static bool io_kill_timeout(struct io_kiocb
*req
, int status
)
53 __must_hold(&req
->ctx
->completion_lock
)
54 __must_hold(&req
->ctx
->timeout_lock
)
56 struct io_timeout_data
*io
= req
->async_data
;
58 if (hrtimer_try_to_cancel(&io
->timer
) != -1) {
59 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
63 atomic_set(&req
->ctx
->cq_timeouts
,
64 atomic_read(&req
->ctx
->cq_timeouts
) + 1);
65 list_del_init(&timeout
->list
);
66 io_req_tw_post_queue(req
, status
, 0);
72 __cold
void io_flush_timeouts(struct io_ring_ctx
*ctx
)
73 __must_hold(&ctx
->completion_lock
)
75 u32 seq
= ctx
->cached_cq_tail
- atomic_read(&ctx
->cq_timeouts
);
76 struct io_timeout
*timeout
, *tmp
;
78 spin_lock_irq(&ctx
->timeout_lock
);
79 list_for_each_entry_safe(timeout
, tmp
, &ctx
->timeout_list
, list
) {
80 struct io_kiocb
*req
= cmd_to_io_kiocb(timeout
);
81 u32 events_needed
, events_got
;
83 if (io_is_timeout_noseq(req
))
87 * Since seq can easily wrap around over time, subtract
88 * the last seq at which timeouts were flushed before comparing.
89 * Assuming not more than 2^31-1 events have happened since,
90 * these subtractions won't have wrapped, so we can check if
91 * target is in [last_seq, current_seq] by comparing the two.
93 events_needed
= timeout
->target_seq
- ctx
->cq_last_tm_flush
;
94 events_got
= seq
- ctx
->cq_last_tm_flush
;
95 if (events_got
< events_needed
)
98 io_kill_timeout(req
, 0);
100 ctx
->cq_last_tm_flush
= seq
;
101 spin_unlock_irq(&ctx
->timeout_lock
);
104 static void io_req_tw_fail_links(struct io_kiocb
*link
, bool *locked
)
106 io_tw_lock(link
->ctx
, locked
);
108 struct io_kiocb
*nxt
= link
->link
;
109 long res
= -ECANCELED
;
111 if (link
->flags
& REQ_F_FAIL
)
114 io_req_set_res(link
, res
, 0);
115 io_req_task_complete(link
, locked
);
120 static void io_fail_links(struct io_kiocb
*req
)
121 __must_hold(&req
->ctx
->completion_lock
)
123 struct io_kiocb
*link
= req
->link
;
124 bool ignore_cqes
= req
->flags
& REQ_F_SKIP_LINK_CQES
;
131 link
->flags
|= REQ_F_CQE_SKIP
;
133 link
->flags
&= ~REQ_F_CQE_SKIP
;
134 trace_io_uring_fail_link(req
, link
);
139 link
->io_task_work
.func
= io_req_tw_fail_links
;
140 io_req_task_work_add(link
);
144 static inline void io_remove_next_linked(struct io_kiocb
*req
)
146 struct io_kiocb
*nxt
= req
->link
;
148 req
->link
= nxt
->link
;
152 bool io_disarm_next(struct io_kiocb
*req
)
153 __must_hold(&req
->ctx
->completion_lock
)
155 struct io_kiocb
*link
= NULL
;
158 if (req
->flags
& REQ_F_ARM_LTIMEOUT
) {
160 req
->flags
&= ~REQ_F_ARM_LTIMEOUT
;
161 if (link
&& link
->opcode
== IORING_OP_LINK_TIMEOUT
) {
162 io_remove_next_linked(req
);
163 io_req_tw_post_queue(link
, -ECANCELED
, 0);
166 } else if (req
->flags
& REQ_F_LINK_TIMEOUT
) {
167 struct io_ring_ctx
*ctx
= req
->ctx
;
169 spin_lock_irq(&ctx
->timeout_lock
);
170 link
= io_disarm_linked_timeout(req
);
171 spin_unlock_irq(&ctx
->timeout_lock
);
174 io_req_tw_post_queue(link
, -ECANCELED
, 0);
177 if (unlikely((req
->flags
& REQ_F_FAIL
) &&
178 !(req
->flags
& REQ_F_HARDLINK
))) {
179 posted
|= (req
->link
!= NULL
);
185 struct io_kiocb
*__io_disarm_linked_timeout(struct io_kiocb
*req
,
186 struct io_kiocb
*link
)
187 __must_hold(&req
->ctx
->completion_lock
)
188 __must_hold(&req
->ctx
->timeout_lock
)
190 struct io_timeout_data
*io
= link
->async_data
;
191 struct io_timeout
*timeout
= io_kiocb_to_cmd(link
, struct io_timeout
);
193 io_remove_next_linked(req
);
194 timeout
->head
= NULL
;
195 if (hrtimer_try_to_cancel(&io
->timer
) != -1) {
196 list_del(&timeout
->list
);
203 static enum hrtimer_restart
io_timeout_fn(struct hrtimer
*timer
)
205 struct io_timeout_data
*data
= container_of(timer
,
206 struct io_timeout_data
, timer
);
207 struct io_kiocb
*req
= data
->req
;
208 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
209 struct io_ring_ctx
*ctx
= req
->ctx
;
212 spin_lock_irqsave(&ctx
->timeout_lock
, flags
);
213 list_del_init(&timeout
->list
);
214 atomic_set(&req
->ctx
->cq_timeouts
,
215 atomic_read(&req
->ctx
->cq_timeouts
) + 1);
216 spin_unlock_irqrestore(&ctx
->timeout_lock
, flags
);
218 if (!(data
->flags
& IORING_TIMEOUT_ETIME_SUCCESS
))
221 io_req_set_res(req
, -ETIME
, 0);
222 req
->io_task_work
.func
= io_req_task_complete
;
223 io_req_task_work_add(req
);
224 return HRTIMER_NORESTART
;
227 static struct io_kiocb
*io_timeout_extract(struct io_ring_ctx
*ctx
,
228 struct io_cancel_data
*cd
)
229 __must_hold(&ctx
->timeout_lock
)
231 struct io_timeout
*timeout
;
232 struct io_timeout_data
*io
;
233 struct io_kiocb
*req
= NULL
;
235 list_for_each_entry(timeout
, &ctx
->timeout_list
, list
) {
236 struct io_kiocb
*tmp
= cmd_to_io_kiocb(timeout
);
238 if (!(cd
->flags
& IORING_ASYNC_CANCEL_ANY
) &&
239 cd
->data
!= tmp
->cqe
.user_data
)
241 if (cd
->flags
& (IORING_ASYNC_CANCEL_ALL
|IORING_ASYNC_CANCEL_ANY
)) {
242 if (cd
->seq
== tmp
->work
.cancel_seq
)
244 tmp
->work
.cancel_seq
= cd
->seq
;
250 return ERR_PTR(-ENOENT
);
252 io
= req
->async_data
;
253 if (hrtimer_try_to_cancel(&io
->timer
) == -1)
254 return ERR_PTR(-EALREADY
);
255 timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
256 list_del_init(&timeout
->list
);
260 int io_timeout_cancel(struct io_ring_ctx
*ctx
, struct io_cancel_data
*cd
)
261 __must_hold(&ctx
->completion_lock
)
263 struct io_kiocb
*req
;
265 spin_lock_irq(&ctx
->timeout_lock
);
266 req
= io_timeout_extract(ctx
, cd
);
267 spin_unlock_irq(&ctx
->timeout_lock
);
271 io_req_task_queue_fail(req
, -ECANCELED
);
275 static void io_req_task_link_timeout(struct io_kiocb
*req
, bool *locked
)
277 unsigned issue_flags
= *locked
? 0 : IO_URING_F_UNLOCKED
;
278 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
279 struct io_kiocb
*prev
= timeout
->prev
;
283 if (!(req
->task
->flags
& PF_EXITING
)) {
284 struct io_cancel_data cd
= {
286 .data
= prev
->cqe
.user_data
,
289 ret
= io_try_cancel(req
->task
->io_uring
, &cd
, issue_flags
);
291 io_req_set_res(req
, ret
?: -ETIME
, 0);
292 io_req_complete_post(req
);
295 io_req_set_res(req
, -ETIME
, 0);
296 io_req_complete_post(req
);
300 static enum hrtimer_restart
io_link_timeout_fn(struct hrtimer
*timer
)
302 struct io_timeout_data
*data
= container_of(timer
,
303 struct io_timeout_data
, timer
);
304 struct io_kiocb
*prev
, *req
= data
->req
;
305 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
306 struct io_ring_ctx
*ctx
= req
->ctx
;
309 spin_lock_irqsave(&ctx
->timeout_lock
, flags
);
310 prev
= timeout
->head
;
311 timeout
->head
= NULL
;
314 * We don't expect the list to be empty, that will only happen if we
315 * race with the completion of the linked work.
318 io_remove_next_linked(prev
);
319 if (!req_ref_inc_not_zero(prev
))
322 list_del(&timeout
->list
);
323 timeout
->prev
= prev
;
324 spin_unlock_irqrestore(&ctx
->timeout_lock
, flags
);
326 req
->io_task_work
.func
= io_req_task_link_timeout
;
327 io_req_task_work_add(req
);
328 return HRTIMER_NORESTART
;
331 static clockid_t
io_timeout_get_clock(struct io_timeout_data
*data
)
333 switch (data
->flags
& IORING_TIMEOUT_CLOCK_MASK
) {
334 case IORING_TIMEOUT_BOOTTIME
:
335 return CLOCK_BOOTTIME
;
336 case IORING_TIMEOUT_REALTIME
:
337 return CLOCK_REALTIME
;
339 /* can't happen, vetted at prep time */
343 return CLOCK_MONOTONIC
;
347 static int io_linked_timeout_update(struct io_ring_ctx
*ctx
, __u64 user_data
,
348 struct timespec64
*ts
, enum hrtimer_mode mode
)
349 __must_hold(&ctx
->timeout_lock
)
351 struct io_timeout_data
*io
;
352 struct io_timeout
*timeout
;
353 struct io_kiocb
*req
= NULL
;
355 list_for_each_entry(timeout
, &ctx
->ltimeout_list
, list
) {
356 struct io_kiocb
*tmp
= cmd_to_io_kiocb(timeout
);
358 if (user_data
== tmp
->cqe
.user_data
) {
366 io
= req
->async_data
;
367 if (hrtimer_try_to_cancel(&io
->timer
) == -1)
369 hrtimer_init(&io
->timer
, io_timeout_get_clock(io
), mode
);
370 io
->timer
.function
= io_link_timeout_fn
;
371 hrtimer_start(&io
->timer
, timespec64_to_ktime(*ts
), mode
);
375 static int io_timeout_update(struct io_ring_ctx
*ctx
, __u64 user_data
,
376 struct timespec64
*ts
, enum hrtimer_mode mode
)
377 __must_hold(&ctx
->timeout_lock
)
379 struct io_cancel_data cd
= { .data
= user_data
, };
380 struct io_kiocb
*req
= io_timeout_extract(ctx
, &cd
);
381 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
382 struct io_timeout_data
*data
;
387 timeout
->off
= 0; /* noseq */
388 data
= req
->async_data
;
389 list_add_tail(&timeout
->list
, &ctx
->timeout_list
);
390 hrtimer_init(&data
->timer
, io_timeout_get_clock(data
), mode
);
391 data
->timer
.function
= io_timeout_fn
;
392 hrtimer_start(&data
->timer
, timespec64_to_ktime(*ts
), mode
);
396 int io_timeout_remove_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
398 struct io_timeout_rem
*tr
= io_kiocb_to_cmd(req
, struct io_timeout_rem
);
400 if (unlikely(req
->flags
& (REQ_F_FIXED_FILE
| REQ_F_BUFFER_SELECT
)))
402 if (sqe
->buf_index
|| sqe
->len
|| sqe
->splice_fd_in
)
405 tr
->ltimeout
= false;
406 tr
->addr
= READ_ONCE(sqe
->addr
);
407 tr
->flags
= READ_ONCE(sqe
->timeout_flags
);
408 if (tr
->flags
& IORING_TIMEOUT_UPDATE_MASK
) {
409 if (hweight32(tr
->flags
& IORING_TIMEOUT_CLOCK_MASK
) > 1)
411 if (tr
->flags
& IORING_LINK_TIMEOUT_UPDATE
)
413 if (tr
->flags
& ~(IORING_TIMEOUT_UPDATE_MASK
|IORING_TIMEOUT_ABS
))
415 if (get_timespec64(&tr
->ts
, u64_to_user_ptr(sqe
->addr2
)))
417 if (tr
->ts
.tv_sec
< 0 || tr
->ts
.tv_nsec
< 0)
419 } else if (tr
->flags
) {
420 /* timeout removal doesn't support flags */
427 static inline enum hrtimer_mode
io_translate_timeout_mode(unsigned int flags
)
429 return (flags
& IORING_TIMEOUT_ABS
) ? HRTIMER_MODE_ABS
434 * Remove or update an existing timeout command
436 int io_timeout_remove(struct io_kiocb
*req
, unsigned int issue_flags
)
438 struct io_timeout_rem
*tr
= io_kiocb_to_cmd(req
, struct io_timeout_rem
);
439 struct io_ring_ctx
*ctx
= req
->ctx
;
442 if (!(tr
->flags
& IORING_TIMEOUT_UPDATE
)) {
443 struct io_cancel_data cd
= { .data
= tr
->addr
, };
445 spin_lock(&ctx
->completion_lock
);
446 ret
= io_timeout_cancel(ctx
, &cd
);
447 spin_unlock(&ctx
->completion_lock
);
449 enum hrtimer_mode mode
= io_translate_timeout_mode(tr
->flags
);
451 spin_lock_irq(&ctx
->timeout_lock
);
453 ret
= io_linked_timeout_update(ctx
, tr
->addr
, &tr
->ts
, mode
);
455 ret
= io_timeout_update(ctx
, tr
->addr
, &tr
->ts
, mode
);
456 spin_unlock_irq(&ctx
->timeout_lock
);
461 io_req_set_res(req
, ret
, 0);
465 static int __io_timeout_prep(struct io_kiocb
*req
,
466 const struct io_uring_sqe
*sqe
,
467 bool is_timeout_link
)
469 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
470 struct io_timeout_data
*data
;
472 u32 off
= READ_ONCE(sqe
->off
);
474 if (sqe
->buf_index
|| sqe
->len
!= 1 || sqe
->splice_fd_in
)
476 if (off
&& is_timeout_link
)
478 flags
= READ_ONCE(sqe
->timeout_flags
);
479 if (flags
& ~(IORING_TIMEOUT_ABS
| IORING_TIMEOUT_CLOCK_MASK
|
480 IORING_TIMEOUT_ETIME_SUCCESS
))
482 /* more than one clock specified is invalid, obviously */
483 if (hweight32(flags
& IORING_TIMEOUT_CLOCK_MASK
) > 1)
486 INIT_LIST_HEAD(&timeout
->list
);
488 if (unlikely(off
&& !req
->ctx
->off_timeout_used
))
489 req
->ctx
->off_timeout_used
= true;
491 if (WARN_ON_ONCE(req_has_async_data(req
)))
493 if (io_alloc_async_data(req
))
496 data
= req
->async_data
;
500 if (get_timespec64(&data
->ts
, u64_to_user_ptr(sqe
->addr
)))
503 if (data
->ts
.tv_sec
< 0 || data
->ts
.tv_nsec
< 0)
506 INIT_LIST_HEAD(&timeout
->list
);
507 data
->mode
= io_translate_timeout_mode(flags
);
508 hrtimer_init(&data
->timer
, io_timeout_get_clock(data
), data
->mode
);
510 if (is_timeout_link
) {
511 struct io_submit_link
*link
= &req
->ctx
->submit_state
.link
;
515 if (link
->last
->opcode
== IORING_OP_LINK_TIMEOUT
)
517 timeout
->head
= link
->last
;
518 link
->last
->flags
|= REQ_F_ARM_LTIMEOUT
;
523 int io_timeout_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
525 return __io_timeout_prep(req
, sqe
, false);
528 int io_link_timeout_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
530 return __io_timeout_prep(req
, sqe
, true);
533 int io_timeout(struct io_kiocb
*req
, unsigned int issue_flags
)
535 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
536 struct io_ring_ctx
*ctx
= req
->ctx
;
537 struct io_timeout_data
*data
= req
->async_data
;
538 struct list_head
*entry
;
539 u32 tail
, off
= timeout
->off
;
541 spin_lock_irq(&ctx
->timeout_lock
);
544 * sqe->off holds how many events that need to occur for this
545 * timeout event to be satisfied. If it isn't set, then this is
546 * a pure timeout request, sequence isn't used.
548 if (io_is_timeout_noseq(req
)) {
549 entry
= ctx
->timeout_list
.prev
;
553 tail
= ctx
->cached_cq_tail
- atomic_read(&ctx
->cq_timeouts
);
554 timeout
->target_seq
= tail
+ off
;
556 /* Update the last seq here in case io_flush_timeouts() hasn't.
557 * This is safe because ->completion_lock is held, and submissions
558 * and completions are never mixed in the same ->completion_lock section.
560 ctx
->cq_last_tm_flush
= tail
;
563 * Insertion sort, ensuring the first entry in the list is always
564 * the one we need first.
566 list_for_each_prev(entry
, &ctx
->timeout_list
) {
567 struct io_timeout
*nextt
= list_entry(entry
, struct io_timeout
, list
);
568 struct io_kiocb
*nxt
= cmd_to_io_kiocb(nextt
);
570 if (io_is_timeout_noseq(nxt
))
572 /* nxt.seq is behind @tail, otherwise would've been completed */
573 if (off
>= nextt
->target_seq
- tail
)
577 list_add(&timeout
->list
, entry
);
578 data
->timer
.function
= io_timeout_fn
;
579 hrtimer_start(&data
->timer
, timespec64_to_ktime(data
->ts
), data
->mode
);
580 spin_unlock_irq(&ctx
->timeout_lock
);
581 return IOU_ISSUE_SKIP_COMPLETE
;
584 void io_queue_linked_timeout(struct io_kiocb
*req
)
586 struct io_timeout
*timeout
= io_kiocb_to_cmd(req
, struct io_timeout
);
587 struct io_ring_ctx
*ctx
= req
->ctx
;
589 spin_lock_irq(&ctx
->timeout_lock
);
591 * If the back reference is NULL, then our linked request finished
592 * before we got a chance to setup the timer
595 struct io_timeout_data
*data
= req
->async_data
;
597 data
->timer
.function
= io_link_timeout_fn
;
598 hrtimer_start(&data
->timer
, timespec64_to_ktime(data
->ts
),
600 list_add_tail(&timeout
->list
, &ctx
->ltimeout_list
);
602 spin_unlock_irq(&ctx
->timeout_lock
);
603 /* drop submission reference */
607 static bool io_match_task(struct io_kiocb
*head
, struct task_struct
*task
,
609 __must_hold(&req
->ctx
->timeout_lock
)
611 struct io_kiocb
*req
;
613 if (task
&& head
->task
!= task
)
618 io_for_each_link(req
, head
) {
619 if (req
->flags
& REQ_F_INFLIGHT
)
625 /* Returns true if we found and killed one or more timeouts */
626 __cold
bool io_kill_timeouts(struct io_ring_ctx
*ctx
, struct task_struct
*tsk
,
629 struct io_timeout
*timeout
, *tmp
;
633 spin_lock_irq(&ctx
->timeout_lock
);
634 list_for_each_entry_safe(timeout
, tmp
, &ctx
->timeout_list
, list
) {
635 struct io_kiocb
*req
= cmd_to_io_kiocb(timeout
);
637 if (io_match_task(req
, tsk
, cancel_all
) &&
638 io_kill_timeout(req
, -ECANCELED
))
641 spin_unlock_irq(&ctx
->timeout_lock
);
642 io_cq_unlock_post(ctx
);
643 return canceled
!= 0;