1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
5 #include <linux/file.h>
6 #include <linux/io_uring.h>
8 #include <uapi/linux/io_uring.h>
10 #include "../kernel/futex/futex.h"
19 struct futex_waitv __user
*uwaitv
;
21 unsigned long futex_val
;
22 unsigned long futex_mask
;
23 unsigned long futexv_owned
;
25 unsigned int futex_nr
;
29 struct io_futex_data
{
32 struct io_cache_entry cache
;
37 void io_futex_cache_init(struct io_ring_ctx
*ctx
)
39 io_alloc_cache_init(&ctx
->futex_cache
, IO_NODE_ALLOC_CACHE_MAX
,
40 sizeof(struct io_futex_data
));
43 static void io_futex_cache_entry_free(struct io_cache_entry
*entry
)
45 kfree(container_of(entry
, struct io_futex_data
, cache
));
48 void io_futex_cache_free(struct io_ring_ctx
*ctx
)
50 io_alloc_cache_free(&ctx
->futex_cache
, io_futex_cache_entry_free
);
53 static void __io_futex_complete(struct io_kiocb
*req
, struct io_tw_state
*ts
)
55 req
->async_data
= NULL
;
56 hlist_del_init(&req
->hash_node
);
57 io_req_task_complete(req
, ts
);
60 static void io_futex_complete(struct io_kiocb
*req
, struct io_tw_state
*ts
)
62 struct io_futex_data
*ifd
= req
->async_data
;
63 struct io_ring_ctx
*ctx
= req
->ctx
;
66 if (!io_alloc_cache_put(&ctx
->futex_cache
, &ifd
->cache
))
68 __io_futex_complete(req
, ts
);
71 static void io_futexv_complete(struct io_kiocb
*req
, struct io_tw_state
*ts
)
73 struct io_futex
*iof
= io_kiocb_to_cmd(req
, struct io_futex
);
74 struct futex_vector
*futexv
= req
->async_data
;
76 io_tw_lock(req
->ctx
, ts
);
78 if (!iof
->futexv_unqueued
) {
81 res
= futex_unqueue_multiple(futexv
, iof
->futex_nr
);
83 io_req_set_res(req
, res
, 0);
86 kfree(req
->async_data
);
87 req
->flags
&= ~REQ_F_ASYNC_DATA
;
88 __io_futex_complete(req
, ts
);
91 static bool io_futexv_claim(struct io_futex
*iof
)
93 if (test_bit(0, &iof
->futexv_owned
) ||
94 test_and_set_bit_lock(0, &iof
->futexv_owned
))
99 static bool __io_futex_cancel(struct io_ring_ctx
*ctx
, struct io_kiocb
*req
)
101 /* futex wake already done or in progress */
102 if (req
->opcode
== IORING_OP_FUTEX_WAIT
) {
103 struct io_futex_data
*ifd
= req
->async_data
;
105 if (!futex_unqueue(&ifd
->q
))
107 req
->io_task_work
.func
= io_futex_complete
;
109 struct io_futex
*iof
= io_kiocb_to_cmd(req
, struct io_futex
);
111 if (!io_futexv_claim(iof
))
113 req
->io_task_work
.func
= io_futexv_complete
;
116 hlist_del_init(&req
->hash_node
);
117 io_req_set_res(req
, -ECANCELED
, 0);
118 io_req_task_work_add(req
);
122 int io_futex_cancel(struct io_ring_ctx
*ctx
, struct io_cancel_data
*cd
,
123 unsigned int issue_flags
)
125 struct hlist_node
*tmp
;
126 struct io_kiocb
*req
;
129 if (cd
->flags
& (IORING_ASYNC_CANCEL_FD
|IORING_ASYNC_CANCEL_FD_FIXED
))
132 io_ring_submit_lock(ctx
, issue_flags
);
133 hlist_for_each_entry_safe(req
, tmp
, &ctx
->futex_list
, hash_node
) {
134 if (req
->cqe
.user_data
!= cd
->data
&&
135 !(cd
->flags
& IORING_ASYNC_CANCEL_ANY
))
137 if (__io_futex_cancel(ctx
, req
))
139 if (!(cd
->flags
& IORING_ASYNC_CANCEL_ALL
))
142 io_ring_submit_unlock(ctx
, issue_flags
);
150 bool io_futex_remove_all(struct io_ring_ctx
*ctx
, struct task_struct
*task
,
153 struct hlist_node
*tmp
;
154 struct io_kiocb
*req
;
157 lockdep_assert_held(&ctx
->uring_lock
);
159 hlist_for_each_entry_safe(req
, tmp
, &ctx
->futex_list
, hash_node
) {
160 if (!io_match_task_safe(req
, task
, cancel_all
))
162 hlist_del_init(&req
->hash_node
);
163 __io_futex_cancel(ctx
, req
);
170 int io_futex_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
172 struct io_futex
*iof
= io_kiocb_to_cmd(req
, struct io_futex
);
175 if (unlikely(sqe
->len
|| sqe
->futex_flags
|| sqe
->buf_index
||
179 iof
->uaddr
= u64_to_user_ptr(READ_ONCE(sqe
->addr
));
180 iof
->futex_val
= READ_ONCE(sqe
->addr2
);
181 iof
->futex_mask
= READ_ONCE(sqe
->addr3
);
182 flags
= READ_ONCE(sqe
->fd
);
184 if (flags
& ~FUTEX2_VALID_MASK
)
187 iof
->futex_flags
= futex2_to_flags(flags
);
188 if (!futex_flags_valid(iof
->futex_flags
))
191 if (!futex_validate_input(iof
->futex_flags
, iof
->futex_val
) ||
192 !futex_validate_input(iof
->futex_flags
, iof
->futex_mask
))
198 static void io_futex_wakev_fn(struct wake_q_head
*wake_q
, struct futex_q
*q
)
200 struct io_kiocb
*req
= q
->wake_data
;
201 struct io_futex
*iof
= io_kiocb_to_cmd(req
, struct io_futex
);
203 if (!io_futexv_claim(iof
))
205 if (unlikely(!__futex_wake_mark(q
)))
208 io_req_set_res(req
, 0, 0);
209 req
->io_task_work
.func
= io_futexv_complete
;
210 io_req_task_work_add(req
);
213 int io_futexv_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
215 struct io_futex
*iof
= io_kiocb_to_cmd(req
, struct io_futex
);
216 struct futex_vector
*futexv
;
219 /* No flags or mask supported for waitv */
220 if (unlikely(sqe
->fd
|| sqe
->buf_index
|| sqe
->file_index
||
221 sqe
->addr2
|| sqe
->futex_flags
|| sqe
->addr3
))
224 iof
->uaddr
= u64_to_user_ptr(READ_ONCE(sqe
->addr
));
225 iof
->futex_nr
= READ_ONCE(sqe
->len
);
226 if (!iof
->futex_nr
|| iof
->futex_nr
> FUTEX_WAITV_MAX
)
229 futexv
= kcalloc(iof
->futex_nr
, sizeof(*futexv
), GFP_KERNEL
);
233 ret
= futex_parse_waitv(futexv
, iof
->uwaitv
, iof
->futex_nr
,
234 io_futex_wakev_fn
, req
);
240 iof
->futexv_owned
= 0;
241 iof
->futexv_unqueued
= 0;
242 req
->flags
|= REQ_F_ASYNC_DATA
;
243 req
->async_data
= futexv
;
247 static void io_futex_wake_fn(struct wake_q_head
*wake_q
, struct futex_q
*q
)
249 struct io_futex_data
*ifd
= container_of(q
, struct io_futex_data
, q
);
250 struct io_kiocb
*req
= ifd
->req
;
252 if (unlikely(!__futex_wake_mark(q
)))
255 io_req_set_res(req
, 0, 0);
256 req
->io_task_work
.func
= io_futex_complete
;
257 io_req_task_work_add(req
);
260 static struct io_futex_data
*io_alloc_ifd(struct io_ring_ctx
*ctx
)
262 struct io_cache_entry
*entry
;
264 entry
= io_alloc_cache_get(&ctx
->futex_cache
);
266 return container_of(entry
, struct io_futex_data
, cache
);
268 return kmalloc(sizeof(struct io_futex_data
), GFP_NOWAIT
);
271 int io_futexv_wait(struct io_kiocb
*req
, unsigned int issue_flags
)
273 struct io_futex
*iof
= io_kiocb_to_cmd(req
, struct io_futex
);
274 struct futex_vector
*futexv
= req
->async_data
;
275 struct io_ring_ctx
*ctx
= req
->ctx
;
278 io_ring_submit_lock(ctx
, issue_flags
);
280 ret
= futex_wait_multiple_setup(futexv
, iof
->futex_nr
, &woken
);
283 * Error case, ret is < 0. Mark the request as failed.
285 if (unlikely(ret
< 0)) {
286 io_ring_submit_unlock(ctx
, issue_flags
);
288 io_req_set_res(req
, ret
, 0);
290 req
->async_data
= NULL
;
291 req
->flags
&= ~REQ_F_ASYNC_DATA
;
296 * 0 return means that we successfully setup the waiters, and that
297 * nobody triggered a wakeup while we were doing so. If the wakeup
298 * happened post setup, the task_work will be run post this issue and
299 * under the submission lock. 1 means We got woken while setting up,
300 * let that side do the completion. Note that
301 * futex_wait_multiple_setup() will have unqueued all the futexes in
302 * this case. Mark us as having done that already, since this is
303 * different from normal wakeup.
307 * If futex_wait_multiple_setup() returns 0 for a
308 * successful setup, then the task state will not be
309 * runnable. This is fine for the sync syscall, as
310 * it'll be blocking unless we already got one of the
311 * futexes woken, but it obviously won't work for an
312 * async invocation. Mark us runnable again.
314 __set_current_state(TASK_RUNNING
);
315 hlist_add_head(&req
->hash_node
, &ctx
->futex_list
);
317 iof
->futexv_unqueued
= 1;
319 io_req_set_res(req
, woken
, 0);
322 io_ring_submit_unlock(ctx
, issue_flags
);
323 return IOU_ISSUE_SKIP_COMPLETE
;
326 int io_futex_wait(struct io_kiocb
*req
, unsigned int issue_flags
)
328 struct io_futex
*iof
= io_kiocb_to_cmd(req
, struct io_futex
);
329 struct io_ring_ctx
*ctx
= req
->ctx
;
330 struct io_futex_data
*ifd
= NULL
;
331 struct futex_hash_bucket
*hb
;
334 if (!iof
->futex_mask
) {
339 io_ring_submit_lock(ctx
, issue_flags
);
340 ifd
= io_alloc_ifd(ctx
);
346 req
->async_data
= ifd
;
347 ifd
->q
= futex_q_init
;
348 ifd
->q
.bitset
= iof
->futex_mask
;
349 ifd
->q
.wake
= io_futex_wake_fn
;
352 ret
= futex_wait_setup(iof
->uaddr
, iof
->futex_val
, iof
->futex_flags
,
355 hlist_add_head(&req
->hash_node
, &ctx
->futex_list
);
356 io_ring_submit_unlock(ctx
, issue_flags
);
358 futex_queue(&ifd
->q
, hb
);
359 return IOU_ISSUE_SKIP_COMPLETE
;
363 io_ring_submit_unlock(ctx
, issue_flags
);
367 io_req_set_res(req
, ret
, 0);
372 int io_futex_wake(struct io_kiocb
*req
, unsigned int issue_flags
)
374 struct io_futex
*iof
= io_kiocb_to_cmd(req
, struct io_futex
);
378 * Strict flags - ensure that waking 0 futexes yields a 0 result.
379 * See commit 43adf8449510 ("futex: FLAGS_STRICT") for details.
381 ret
= futex_wake(iof
->uaddr
, FLAGS_STRICT
| iof
->futex_flags
,
382 iof
->futex_val
, iof
->futex_mask
);
385 io_req_set_res(req
, ret
, 0);