1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
5 #include <linux/file.h>
7 #include <linux/slab.h>
8 #include <linux/poll.h>
9 #include <linux/hashtable.h>
10 #include <linux/io_uring.h>
12 #include <trace/events/io_uring.h>
14 #include <uapi/linux/io_uring.h>
23 struct io_poll_update
{
29 bool update_user_data
;
32 struct io_poll_table
{
33 struct poll_table_struct pt
;
38 /* output value, set only if arm poll returns >0 */
42 #define IO_POLL_CANCEL_FLAG BIT(31)
43 #define IO_POLL_RETRY_FLAG BIT(30)
44 #define IO_POLL_REF_MASK GENMASK(29, 0)
47 * We usually have 1-2 refs taken, 128 is more than enough and we want to
48 * maximise the margin between this amount and the moment when it overflows.
50 #define IO_POLL_REF_BIAS 128
52 #define IO_WQE_F_DOUBLE 1
54 static int io_poll_wake(struct wait_queue_entry
*wait
, unsigned mode
, int sync
,
57 static inline struct io_kiocb
*wqe_to_req(struct wait_queue_entry
*wqe
)
59 unsigned long priv
= (unsigned long)wqe
->private;
61 return (struct io_kiocb
*)(priv
& ~IO_WQE_F_DOUBLE
);
64 static inline bool wqe_is_double(struct wait_queue_entry
*wqe
)
66 unsigned long priv
= (unsigned long)wqe
->private;
68 return priv
& IO_WQE_F_DOUBLE
;
71 static bool io_poll_get_ownership_slowpath(struct io_kiocb
*req
)
76 * poll_refs are already elevated and we don't have much hope for
77 * grabbing the ownership. Instead of incrementing set a retry flag
78 * to notify the loop that there might have been some change.
80 v
= atomic_fetch_or(IO_POLL_RETRY_FLAG
, &req
->poll_refs
);
81 if (v
& IO_POLL_REF_MASK
)
83 return !(atomic_fetch_inc(&req
->poll_refs
) & IO_POLL_REF_MASK
);
87 * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can
88 * bump it and acquire ownership. It's disallowed to modify requests while not
89 * owning it, that prevents from races for enqueueing task_work's and b/w
90 * arming poll and wakeups.
92 static inline bool io_poll_get_ownership(struct io_kiocb
*req
)
94 if (unlikely(atomic_read(&req
->poll_refs
) >= IO_POLL_REF_BIAS
))
95 return io_poll_get_ownership_slowpath(req
);
96 return !(atomic_fetch_inc(&req
->poll_refs
) & IO_POLL_REF_MASK
);
99 static void io_poll_mark_cancelled(struct io_kiocb
*req
)
101 atomic_or(IO_POLL_CANCEL_FLAG
, &req
->poll_refs
);
104 static struct io_poll
*io_poll_get_double(struct io_kiocb
*req
)
106 /* pure poll stashes this in ->async_data, poll driven retry elsewhere */
107 if (req
->opcode
== IORING_OP_POLL_ADD
)
108 return req
->async_data
;
109 return req
->apoll
->double_poll
;
112 static struct io_poll
*io_poll_get_single(struct io_kiocb
*req
)
114 if (req
->opcode
== IORING_OP_POLL_ADD
)
115 return io_kiocb_to_cmd(req
, struct io_poll
);
116 return &req
->apoll
->poll
;
119 static void io_poll_req_insert(struct io_kiocb
*req
)
121 struct io_hash_table
*table
= &req
->ctx
->cancel_table
;
122 u32 index
= hash_long(req
->cqe
.user_data
, table
->hash_bits
);
123 struct io_hash_bucket
*hb
= &table
->hbs
[index
];
125 spin_lock(&hb
->lock
);
126 hlist_add_head(&req
->hash_node
, &hb
->list
);
127 spin_unlock(&hb
->lock
);
130 static void io_poll_req_delete(struct io_kiocb
*req
, struct io_ring_ctx
*ctx
)
132 struct io_hash_table
*table
= &req
->ctx
->cancel_table
;
133 u32 index
= hash_long(req
->cqe
.user_data
, table
->hash_bits
);
134 spinlock_t
*lock
= &table
->hbs
[index
].lock
;
137 hash_del(&req
->hash_node
);
141 static void io_poll_req_insert_locked(struct io_kiocb
*req
)
143 struct io_hash_table
*table
= &req
->ctx
->cancel_table_locked
;
144 u32 index
= hash_long(req
->cqe
.user_data
, table
->hash_bits
);
146 lockdep_assert_held(&req
->ctx
->uring_lock
);
148 hlist_add_head(&req
->hash_node
, &table
->hbs
[index
].list
);
151 static void io_poll_tw_hash_eject(struct io_kiocb
*req
, struct io_tw_state
*ts
)
153 struct io_ring_ctx
*ctx
= req
->ctx
;
155 if (req
->flags
& REQ_F_HASH_LOCKED
) {
157 * ->cancel_table_locked is protected by ->uring_lock in
158 * contrast to per bucket spinlocks. Likely, tctx_task_work()
159 * already grabbed the mutex for us, but there is a chance it
163 hash_del(&req
->hash_node
);
164 req
->flags
&= ~REQ_F_HASH_LOCKED
;
166 io_poll_req_delete(req
, ctx
);
170 static void io_init_poll_iocb(struct io_poll
*poll
, __poll_t events
)
173 #define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
174 /* mask in events that we always want/need */
175 poll
->events
= events
| IO_POLL_UNMASK
;
176 INIT_LIST_HEAD(&poll
->wait
.entry
);
177 init_waitqueue_func_entry(&poll
->wait
, io_poll_wake
);
180 static inline void io_poll_remove_entry(struct io_poll
*poll
)
182 struct wait_queue_head
*head
= smp_load_acquire(&poll
->head
);
185 spin_lock_irq(&head
->lock
);
186 list_del_init(&poll
->wait
.entry
);
188 spin_unlock_irq(&head
->lock
);
192 static void io_poll_remove_entries(struct io_kiocb
*req
)
195 * Nothing to do if neither of those flags are set. Avoid dipping
196 * into the poll/apoll/double cachelines if we can.
198 if (!(req
->flags
& (REQ_F_SINGLE_POLL
| REQ_F_DOUBLE_POLL
)))
202 * While we hold the waitqueue lock and the waitqueue is nonempty,
203 * wake_up_pollfree() will wait for us. However, taking the waitqueue
204 * lock in the first place can race with the waitqueue being freed.
206 * We solve this as eventpoll does: by taking advantage of the fact that
207 * all users of wake_up_pollfree() will RCU-delay the actual free. If
208 * we enter rcu_read_lock() and see that the pointer to the queue is
209 * non-NULL, we can then lock it without the memory being freed out from
212 * Keep holding rcu_read_lock() as long as we hold the queue lock, in
213 * case the caller deletes the entry from the queue, leaving it empty.
214 * In that case, only RCU prevents the queue memory from being freed.
217 if (req
->flags
& REQ_F_SINGLE_POLL
)
218 io_poll_remove_entry(io_poll_get_single(req
));
219 if (req
->flags
& REQ_F_DOUBLE_POLL
)
220 io_poll_remove_entry(io_poll_get_double(req
));
226 IOU_POLL_NO_ACTION
= 1,
227 IOU_POLL_REMOVE_POLL_USE_RES
= 2,
228 IOU_POLL_REISSUE
= 3,
232 * All poll tw should go through this. Checks for poll events, manages
233 * references, does rewait, etc.
235 * Returns a negative error on failure. IOU_POLL_NO_ACTION when no action
236 * require, which is either spurious wakeup or multishot CQE is served.
237 * IOU_POLL_DONE when it's done with the request, then the mask is stored in
238 * req->cqe.res. IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot
239 * poll and that the result is stored in req->cqe.
241 static int io_poll_check_events(struct io_kiocb
*req
, struct io_tw_state
*ts
)
245 /* req->task == current here, checking PF_EXITING is safe */
246 if (unlikely(req
->task
->flags
& PF_EXITING
))
250 v
= atomic_read(&req
->poll_refs
);
252 if (unlikely(v
!= 1)) {
253 /* tw should be the owner and so have some refs */
254 if (WARN_ON_ONCE(!(v
& IO_POLL_REF_MASK
)))
255 return IOU_POLL_NO_ACTION
;
256 if (v
& IO_POLL_CANCEL_FLAG
)
259 * cqe.res contains only events of the first wake up
260 * and all others are to be lost. Redo vfs_poll() to get
263 if ((v
& IO_POLL_REF_MASK
) != 1)
266 if (v
& IO_POLL_RETRY_FLAG
) {
269 * We won't find new events that came in between
270 * vfs_poll and the ref put unless we clear the
273 atomic_andnot(IO_POLL_RETRY_FLAG
, &req
->poll_refs
);
274 v
&= ~IO_POLL_RETRY_FLAG
;
278 /* the mask was stashed in __io_poll_execute */
280 struct poll_table_struct pt
= { ._key
= req
->apoll_events
};
281 req
->cqe
.res
= vfs_poll(req
->file
, &pt
) & req
->apoll_events
;
283 * We got woken with a mask, but someone else got to
284 * it first. The above vfs_poll() doesn't add us back
285 * to the waitqueue, so if we get nothing back, we
286 * should be safe and attempt a reissue.
288 if (unlikely(!req
->cqe
.res
)) {
289 /* Multishot armed need not reissue */
290 if (!(req
->apoll_events
& EPOLLONESHOT
))
292 return IOU_POLL_REISSUE
;
295 if (req
->apoll_events
& EPOLLONESHOT
)
296 return IOU_POLL_DONE
;
298 /* multishot, just fill a CQE and proceed */
299 if (!(req
->flags
& REQ_F_APOLL_MULTISHOT
)) {
300 __poll_t mask
= mangle_poll(req
->cqe
.res
&
303 if (!io_fill_cqe_req_aux(req
, ts
->locked
, mask
,
304 IORING_CQE_F_MORE
)) {
305 io_req_set_res(req
, mask
, 0);
306 return IOU_POLL_REMOVE_POLL_USE_RES
;
309 int ret
= io_poll_issue(req
, ts
);
310 if (ret
== IOU_STOP_MULTISHOT
)
311 return IOU_POLL_REMOVE_POLL_USE_RES
;
316 /* force the next iteration to vfs_poll() */
320 * Release all references, retry if someone tried to restart
321 * task_work while we were executing it.
323 } while (atomic_sub_return(v
& IO_POLL_REF_MASK
, &req
->poll_refs
) &
326 return IOU_POLL_NO_ACTION
;
329 void io_poll_task_func(struct io_kiocb
*req
, struct io_tw_state
*ts
)
333 ret
= io_poll_check_events(req
, ts
);
334 if (ret
== IOU_POLL_NO_ACTION
)
336 io_poll_remove_entries(req
);
337 io_poll_tw_hash_eject(req
, ts
);
339 if (req
->opcode
== IORING_OP_POLL_ADD
) {
340 if (ret
== IOU_POLL_DONE
) {
341 struct io_poll
*poll
;
343 poll
= io_kiocb_to_cmd(req
, struct io_poll
);
344 req
->cqe
.res
= mangle_poll(req
->cqe
.res
& poll
->events
);
345 } else if (ret
== IOU_POLL_REISSUE
) {
346 io_req_task_submit(req
, ts
);
348 } else if (ret
!= IOU_POLL_REMOVE_POLL_USE_RES
) {
353 io_req_set_res(req
, req
->cqe
.res
, 0);
354 io_req_task_complete(req
, ts
);
356 io_tw_lock(req
->ctx
, ts
);
358 if (ret
== IOU_POLL_REMOVE_POLL_USE_RES
)
359 io_req_task_complete(req
, ts
);
360 else if (ret
== IOU_POLL_DONE
|| ret
== IOU_POLL_REISSUE
)
361 io_req_task_submit(req
, ts
);
363 io_req_defer_failed(req
, ret
);
367 static void __io_poll_execute(struct io_kiocb
*req
, int mask
)
369 io_req_set_res(req
, mask
, 0);
370 req
->io_task_work
.func
= io_poll_task_func
;
372 trace_io_uring_task_add(req
, mask
);
373 __io_req_task_work_add(req
, IOU_F_TWQ_LAZY_WAKE
);
376 static inline void io_poll_execute(struct io_kiocb
*req
, int res
)
378 if (io_poll_get_ownership(req
))
379 __io_poll_execute(req
, res
);
382 static void io_poll_cancel_req(struct io_kiocb
*req
)
384 io_poll_mark_cancelled(req
);
385 /* kick tw, which should complete the request */
386 io_poll_execute(req
, 0);
389 #define IO_ASYNC_POLL_COMMON (EPOLLONESHOT | EPOLLPRI)
391 static __cold
int io_pollfree_wake(struct io_kiocb
*req
, struct io_poll
*poll
)
393 io_poll_mark_cancelled(req
);
394 /* we have to kick tw in case it's not already */
395 io_poll_execute(req
, 0);
398 * If the waitqueue is being freed early but someone is already
399 * holds ownership over it, we have to tear down the request as
400 * best we can. That means immediately removing the request from
401 * its waitqueue and preventing all further accesses to the
402 * waitqueue via the request.
404 list_del_init(&poll
->wait
.entry
);
407 * Careful: this *must* be the last step, since as soon
408 * as req->head is NULL'ed out, the request can be
409 * completed and freed, since aio_poll_complete_work()
410 * will no longer need to take the waitqueue lock.
412 smp_store_release(&poll
->head
, NULL
);
416 static int io_poll_wake(struct wait_queue_entry
*wait
, unsigned mode
, int sync
,
419 struct io_kiocb
*req
= wqe_to_req(wait
);
420 struct io_poll
*poll
= container_of(wait
, struct io_poll
, wait
);
421 __poll_t mask
= key_to_poll(key
);
423 if (unlikely(mask
& POLLFREE
))
424 return io_pollfree_wake(req
, poll
);
426 /* for instances that support it check for an event match first */
427 if (mask
&& !(mask
& (poll
->events
& ~IO_ASYNC_POLL_COMMON
)))
430 if (io_poll_get_ownership(req
)) {
432 * If we trigger a multishot poll off our own wakeup path,
433 * disable multishot as there is a circular dependency between
434 * CQ posting and triggering the event.
436 if (mask
& EPOLL_URING_WAKE
)
437 poll
->events
|= EPOLLONESHOT
;
439 /* optional, saves extra locking for removal in tw handler */
440 if (mask
&& poll
->events
& EPOLLONESHOT
) {
441 list_del_init(&poll
->wait
.entry
);
443 if (wqe_is_double(wait
))
444 req
->flags
&= ~REQ_F_DOUBLE_POLL
;
446 req
->flags
&= ~REQ_F_SINGLE_POLL
;
448 __io_poll_execute(req
, mask
);
453 /* fails only when polling is already completing by the first entry */
454 static bool io_poll_double_prepare(struct io_kiocb
*req
)
456 struct wait_queue_head
*head
;
457 struct io_poll
*poll
= io_poll_get_single(req
);
459 /* head is RCU protected, see io_poll_remove_entries() comments */
461 head
= smp_load_acquire(&poll
->head
);
463 * poll arm might not hold ownership and so race for req->flags with
464 * io_poll_wake(). There is only one poll entry queued, serialise with
465 * it by taking its head lock. As we're still arming the tw hanlder
466 * is not going to be run, so there are no races with it.
469 spin_lock_irq(&head
->lock
);
470 req
->flags
|= REQ_F_DOUBLE_POLL
;
471 if (req
->opcode
== IORING_OP_POLL_ADD
)
472 req
->flags
|= REQ_F_ASYNC_DATA
;
473 spin_unlock_irq(&head
->lock
);
479 static void __io_queue_proc(struct io_poll
*poll
, struct io_poll_table
*pt
,
480 struct wait_queue_head
*head
,
481 struct io_poll
**poll_ptr
)
483 struct io_kiocb
*req
= pt
->req
;
484 unsigned long wqe_private
= (unsigned long) req
;
487 * The file being polled uses multiple waitqueues for poll handling
488 * (e.g. one for read, one for write). Setup a separate io_poll
491 if (unlikely(pt
->nr_entries
)) {
492 struct io_poll
*first
= poll
;
494 /* double add on the same waitqueue head, ignore */
495 if (first
->head
== head
)
497 /* already have a 2nd entry, fail a third attempt */
499 if ((*poll_ptr
)->head
== head
)
505 poll
= kmalloc(sizeof(*poll
), GFP_ATOMIC
);
511 /* mark as double wq entry */
512 wqe_private
|= IO_WQE_F_DOUBLE
;
513 io_init_poll_iocb(poll
, first
->events
);
514 if (!io_poll_double_prepare(req
)) {
515 /* the request is completing, just back off */
521 /* fine to modify, there is no poll queued to race with us */
522 req
->flags
|= REQ_F_SINGLE_POLL
;
527 poll
->wait
.private = (void *) wqe_private
;
529 if (poll
->events
& EPOLLEXCLUSIVE
)
530 add_wait_queue_exclusive(head
, &poll
->wait
);
532 add_wait_queue(head
, &poll
->wait
);
535 static void io_poll_queue_proc(struct file
*file
, struct wait_queue_head
*head
,
536 struct poll_table_struct
*p
)
538 struct io_poll_table
*pt
= container_of(p
, struct io_poll_table
, pt
);
539 struct io_poll
*poll
= io_kiocb_to_cmd(pt
->req
, struct io_poll
);
541 __io_queue_proc(poll
, pt
, head
,
542 (struct io_poll
**) &pt
->req
->async_data
);
545 static bool io_poll_can_finish_inline(struct io_kiocb
*req
,
546 struct io_poll_table
*pt
)
548 return pt
->owning
|| io_poll_get_ownership(req
);
551 static void io_poll_add_hash(struct io_kiocb
*req
)
553 if (req
->flags
& REQ_F_HASH_LOCKED
)
554 io_poll_req_insert_locked(req
);
556 io_poll_req_insert(req
);
560 * Returns 0 when it's handed over for polling. The caller owns the requests if
561 * it returns non-zero, but otherwise should not touch it. Negative values
562 * contain an error code. When the result is >0, the polling has completed
563 * inline and ipt.result_mask is set to the mask.
565 static int __io_arm_poll_handler(struct io_kiocb
*req
,
566 struct io_poll
*poll
,
567 struct io_poll_table
*ipt
, __poll_t mask
,
568 unsigned issue_flags
)
570 struct io_ring_ctx
*ctx
= req
->ctx
;
572 INIT_HLIST_NODE(&req
->hash_node
);
573 req
->work
.cancel_seq
= atomic_read(&ctx
->cancel_seq
);
574 io_init_poll_iocb(poll
, mask
);
575 poll
->file
= req
->file
;
576 req
->apoll_events
= poll
->events
;
583 * Polling is either completed here or via task_work, so if we're in the
584 * task context we're naturally serialised with tw by merit of running
585 * the same task. When it's io-wq, take the ownership to prevent tw
586 * from running. However, when we're in the task context, skip taking
587 * it as an optimisation.
589 * Note: even though the request won't be completed/freed, without
590 * ownership we still can race with io_poll_wake().
591 * io_poll_can_finish_inline() tries to deal with that.
593 ipt
->owning
= issue_flags
& IO_URING_F_UNLOCKED
;
594 atomic_set(&req
->poll_refs
, (int)ipt
->owning
);
596 /* io-wq doesn't hold uring_lock */
597 if (issue_flags
& IO_URING_F_UNLOCKED
)
598 req
->flags
&= ~REQ_F_HASH_LOCKED
;
600 mask
= vfs_poll(req
->file
, &ipt
->pt
) & poll
->events
;
602 if (unlikely(ipt
->error
|| !ipt
->nr_entries
)) {
603 io_poll_remove_entries(req
);
605 if (!io_poll_can_finish_inline(req
, ipt
)) {
606 io_poll_mark_cancelled(req
);
608 } else if (mask
&& (poll
->events
& EPOLLET
)) {
609 ipt
->result_mask
= mask
;
612 return ipt
->error
?: -EINVAL
;
616 ((poll
->events
& (EPOLLET
|EPOLLONESHOT
)) == (EPOLLET
|EPOLLONESHOT
))) {
617 if (!io_poll_can_finish_inline(req
, ipt
)) {
618 io_poll_add_hash(req
);
621 io_poll_remove_entries(req
);
622 ipt
->result_mask
= mask
;
623 /* no one else has access to the req, forget about the ref */
627 io_poll_add_hash(req
);
629 if (mask
&& (poll
->events
& EPOLLET
) &&
630 io_poll_can_finish_inline(req
, ipt
)) {
631 __io_poll_execute(req
, mask
);
637 * Try to release ownership. If we see a change of state, e.g.
638 * poll was waken up, queue up a tw, it'll deal with it.
640 if (atomic_cmpxchg(&req
->poll_refs
, 1, 0) != 1)
641 __io_poll_execute(req
, 0);
646 static void io_async_queue_proc(struct file
*file
, struct wait_queue_head
*head
,
647 struct poll_table_struct
*p
)
649 struct io_poll_table
*pt
= container_of(p
, struct io_poll_table
, pt
);
650 struct async_poll
*apoll
= pt
->req
->apoll
;
652 __io_queue_proc(&apoll
->poll
, pt
, head
, &apoll
->double_poll
);
656 * We can't reliably detect loops in repeated poll triggers and issue
657 * subsequently failing. But rather than fail these immediately, allow a
658 * certain amount of retries before we give up. Given that this condition
659 * should _rarely_ trigger even once, we should be fine with a larger value.
661 #define APOLL_MAX_RETRY 128
663 static struct async_poll
*io_req_alloc_apoll(struct io_kiocb
*req
,
664 unsigned issue_flags
)
666 struct io_ring_ctx
*ctx
= req
->ctx
;
667 struct io_cache_entry
*entry
;
668 struct async_poll
*apoll
;
670 if (req
->flags
& REQ_F_POLLED
) {
672 kfree(apoll
->double_poll
);
673 } else if (!(issue_flags
& IO_URING_F_UNLOCKED
)) {
674 entry
= io_alloc_cache_get(&ctx
->apoll_cache
);
677 apoll
= container_of(entry
, struct async_poll
, cache
);
678 apoll
->poll
.retries
= APOLL_MAX_RETRY
;
681 apoll
= kmalloc(sizeof(*apoll
), GFP_ATOMIC
);
682 if (unlikely(!apoll
))
684 apoll
->poll
.retries
= APOLL_MAX_RETRY
;
686 apoll
->double_poll
= NULL
;
688 if (unlikely(!--apoll
->poll
.retries
))
693 int io_arm_poll_handler(struct io_kiocb
*req
, unsigned issue_flags
)
695 const struct io_issue_def
*def
= &io_issue_defs
[req
->opcode
];
696 struct async_poll
*apoll
;
697 struct io_poll_table ipt
;
698 __poll_t mask
= POLLPRI
| POLLERR
| EPOLLET
;
702 * apoll requests already grab the mutex to complete in the tw handler,
703 * so removal from the mutex-backed hash is free, use it by default.
705 req
->flags
|= REQ_F_HASH_LOCKED
;
707 if (!def
->pollin
&& !def
->pollout
)
708 return IO_APOLL_ABORTED
;
709 if (!file_can_poll(req
->file
))
710 return IO_APOLL_ABORTED
;
711 if (!(req
->flags
& REQ_F_APOLL_MULTISHOT
))
712 mask
|= EPOLLONESHOT
;
715 mask
|= EPOLLIN
| EPOLLRDNORM
;
717 /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */
718 if (req
->flags
& REQ_F_CLEAR_POLLIN
)
721 mask
|= EPOLLOUT
| EPOLLWRNORM
;
723 if (def
->poll_exclusive
)
724 mask
|= EPOLLEXCLUSIVE
;
726 apoll
= io_req_alloc_apoll(req
, issue_flags
);
728 return IO_APOLL_ABORTED
;
729 req
->flags
&= ~(REQ_F_SINGLE_POLL
| REQ_F_DOUBLE_POLL
);
730 req
->flags
|= REQ_F_POLLED
;
731 ipt
.pt
._qproc
= io_async_queue_proc
;
733 io_kbuf_recycle(req
, issue_flags
);
735 ret
= __io_arm_poll_handler(req
, &apoll
->poll
, &ipt
, mask
, issue_flags
);
737 return ret
> 0 ? IO_APOLL_READY
: IO_APOLL_ABORTED
;
738 trace_io_uring_poll_arm(req
, mask
, apoll
->poll
.events
);
742 static __cold
bool io_poll_remove_all_table(struct task_struct
*tsk
,
743 struct io_hash_table
*table
,
746 unsigned nr_buckets
= 1U << table
->hash_bits
;
747 struct hlist_node
*tmp
;
748 struct io_kiocb
*req
;
752 for (i
= 0; i
< nr_buckets
; i
++) {
753 struct io_hash_bucket
*hb
= &table
->hbs
[i
];
755 spin_lock(&hb
->lock
);
756 hlist_for_each_entry_safe(req
, tmp
, &hb
->list
, hash_node
) {
757 if (io_match_task_safe(req
, tsk
, cancel_all
)) {
758 hlist_del_init(&req
->hash_node
);
759 io_poll_cancel_req(req
);
763 spin_unlock(&hb
->lock
);
769 * Returns true if we found and killed one or more poll requests
771 __cold
bool io_poll_remove_all(struct io_ring_ctx
*ctx
, struct task_struct
*tsk
,
773 __must_hold(&ctx
->uring_lock
)
777 ret
= io_poll_remove_all_table(tsk
, &ctx
->cancel_table
, cancel_all
);
778 ret
|= io_poll_remove_all_table(tsk
, &ctx
->cancel_table_locked
, cancel_all
);
782 static struct io_kiocb
*io_poll_find(struct io_ring_ctx
*ctx
, bool poll_only
,
783 struct io_cancel_data
*cd
,
784 struct io_hash_table
*table
,
785 struct io_hash_bucket
**out_bucket
)
787 struct io_kiocb
*req
;
788 u32 index
= hash_long(cd
->data
, table
->hash_bits
);
789 struct io_hash_bucket
*hb
= &table
->hbs
[index
];
793 spin_lock(&hb
->lock
);
794 hlist_for_each_entry(req
, &hb
->list
, hash_node
) {
795 if (cd
->data
!= req
->cqe
.user_data
)
797 if (poll_only
&& req
->opcode
!= IORING_OP_POLL_ADD
)
799 if (cd
->flags
& IORING_ASYNC_CANCEL_ALL
) {
800 if (cd
->seq
== req
->work
.cancel_seq
)
802 req
->work
.cancel_seq
= cd
->seq
;
807 spin_unlock(&hb
->lock
);
811 static struct io_kiocb
*io_poll_file_find(struct io_ring_ctx
*ctx
,
812 struct io_cancel_data
*cd
,
813 struct io_hash_table
*table
,
814 struct io_hash_bucket
**out_bucket
)
816 unsigned nr_buckets
= 1U << table
->hash_bits
;
817 struct io_kiocb
*req
;
822 for (i
= 0; i
< nr_buckets
; i
++) {
823 struct io_hash_bucket
*hb
= &table
->hbs
[i
];
825 spin_lock(&hb
->lock
);
826 hlist_for_each_entry(req
, &hb
->list
, hash_node
) {
827 if (io_cancel_req_match(req
, cd
)) {
832 spin_unlock(&hb
->lock
);
837 static int io_poll_disarm(struct io_kiocb
*req
)
841 if (!io_poll_get_ownership(req
))
843 io_poll_remove_entries(req
);
844 hash_del(&req
->hash_node
);
848 static int __io_poll_cancel(struct io_ring_ctx
*ctx
, struct io_cancel_data
*cd
,
849 struct io_hash_table
*table
)
851 struct io_hash_bucket
*bucket
;
852 struct io_kiocb
*req
;
854 if (cd
->flags
& (IORING_ASYNC_CANCEL_FD
| IORING_ASYNC_CANCEL_OP
|
855 IORING_ASYNC_CANCEL_ANY
))
856 req
= io_poll_file_find(ctx
, cd
, table
, &bucket
);
858 req
= io_poll_find(ctx
, false, cd
, table
, &bucket
);
861 io_poll_cancel_req(req
);
863 spin_unlock(&bucket
->lock
);
864 return req
? 0 : -ENOENT
;
867 int io_poll_cancel(struct io_ring_ctx
*ctx
, struct io_cancel_data
*cd
,
868 unsigned issue_flags
)
872 ret
= __io_poll_cancel(ctx
, cd
, &ctx
->cancel_table
);
876 io_ring_submit_lock(ctx
, issue_flags
);
877 ret
= __io_poll_cancel(ctx
, cd
, &ctx
->cancel_table_locked
);
878 io_ring_submit_unlock(ctx
, issue_flags
);
882 static __poll_t
io_poll_parse_events(const struct io_uring_sqe
*sqe
,
887 events
= READ_ONCE(sqe
->poll32_events
);
889 events
= swahw32(events
);
891 if (!(flags
& IORING_POLL_ADD_MULTI
))
892 events
|= EPOLLONESHOT
;
893 if (!(flags
& IORING_POLL_ADD_LEVEL
))
895 return demangle_poll(events
) |
896 (events
& (EPOLLEXCLUSIVE
|EPOLLONESHOT
|EPOLLET
));
899 int io_poll_remove_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
901 struct io_poll_update
*upd
= io_kiocb_to_cmd(req
, struct io_poll_update
);
904 if (sqe
->buf_index
|| sqe
->splice_fd_in
)
906 flags
= READ_ONCE(sqe
->len
);
907 if (flags
& ~(IORING_POLL_UPDATE_EVENTS
| IORING_POLL_UPDATE_USER_DATA
|
908 IORING_POLL_ADD_MULTI
))
910 /* meaningless without update */
911 if (flags
== IORING_POLL_ADD_MULTI
)
914 upd
->old_user_data
= READ_ONCE(sqe
->addr
);
915 upd
->update_events
= flags
& IORING_POLL_UPDATE_EVENTS
;
916 upd
->update_user_data
= flags
& IORING_POLL_UPDATE_USER_DATA
;
918 upd
->new_user_data
= READ_ONCE(sqe
->off
);
919 if (!upd
->update_user_data
&& upd
->new_user_data
)
921 if (upd
->update_events
)
922 upd
->events
= io_poll_parse_events(sqe
, flags
);
923 else if (sqe
->poll32_events
)
929 int io_poll_add_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
931 struct io_poll
*poll
= io_kiocb_to_cmd(req
, struct io_poll
);
934 if (sqe
->buf_index
|| sqe
->off
|| sqe
->addr
)
936 flags
= READ_ONCE(sqe
->len
);
937 if (flags
& ~IORING_POLL_ADD_MULTI
)
939 if ((flags
& IORING_POLL_ADD_MULTI
) && (req
->flags
& REQ_F_CQE_SKIP
))
942 poll
->events
= io_poll_parse_events(sqe
, flags
);
946 int io_poll_add(struct io_kiocb
*req
, unsigned int issue_flags
)
948 struct io_poll
*poll
= io_kiocb_to_cmd(req
, struct io_poll
);
949 struct io_poll_table ipt
;
952 ipt
.pt
._qproc
= io_poll_queue_proc
;
955 * If sqpoll or single issuer, there is no contention for ->uring_lock
956 * and we'll end up holding it in tw handlers anyway.
958 if (req
->ctx
->flags
& (IORING_SETUP_SQPOLL
|IORING_SETUP_SINGLE_ISSUER
))
959 req
->flags
|= REQ_F_HASH_LOCKED
;
961 ret
= __io_arm_poll_handler(req
, poll
, &ipt
, poll
->events
, issue_flags
);
963 io_req_set_res(req
, ipt
.result_mask
, 0);
966 return ret
?: IOU_ISSUE_SKIP_COMPLETE
;
969 int io_poll_remove(struct io_kiocb
*req
, unsigned int issue_flags
)
971 struct io_poll_update
*poll_update
= io_kiocb_to_cmd(req
, struct io_poll_update
);
972 struct io_ring_ctx
*ctx
= req
->ctx
;
973 struct io_cancel_data cd
= { .ctx
= ctx
, .data
= poll_update
->old_user_data
, };
974 struct io_hash_bucket
*bucket
;
975 struct io_kiocb
*preq
;
977 struct io_tw_state ts
= { .locked
= true };
979 io_ring_submit_lock(ctx
, issue_flags
);
980 preq
= io_poll_find(ctx
, true, &cd
, &ctx
->cancel_table
, &bucket
);
981 ret2
= io_poll_disarm(preq
);
983 spin_unlock(&bucket
->lock
);
986 if (ret2
!= -ENOENT
) {
991 preq
= io_poll_find(ctx
, true, &cd
, &ctx
->cancel_table_locked
, &bucket
);
992 ret2
= io_poll_disarm(preq
);
994 spin_unlock(&bucket
->lock
);
1001 if (WARN_ON_ONCE(preq
->opcode
!= IORING_OP_POLL_ADD
)) {
1006 if (poll_update
->update_events
|| poll_update
->update_user_data
) {
1007 /* only mask one event flags, keep behavior flags */
1008 if (poll_update
->update_events
) {
1009 struct io_poll
*poll
= io_kiocb_to_cmd(preq
, struct io_poll
);
1011 poll
->events
&= ~0xffff;
1012 poll
->events
|= poll_update
->events
& 0xffff;
1013 poll
->events
|= IO_POLL_UNMASK
;
1015 if (poll_update
->update_user_data
)
1016 preq
->cqe
.user_data
= poll_update
->new_user_data
;
1018 ret2
= io_poll_add(preq
, issue_flags
& ~IO_URING_F_UNLOCKED
);
1019 /* successfully updated, don't complete poll request */
1020 if (!ret2
|| ret2
== -EIOCBQUEUED
)
1025 io_req_set_res(preq
, -ECANCELED
, 0);
1026 io_req_task_complete(preq
, &ts
);
1028 io_ring_submit_unlock(ctx
, issue_flags
);
1033 /* complete update request, we're done with it */
1034 io_req_set_res(req
, ret
, 0);
1038 void io_apoll_cache_free(struct io_cache_entry
*entry
)
1040 kfree(container_of(entry
, struct async_poll
, cache
));