1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
5 #include <linux/file.h>
6 #include <linux/io_uring.h>
8 #include <uapi/linux/io_uring.h>
10 #include "../kernel/futex/futex.h"
19 struct futex_waitv __user
*uwaitv
;
21 unsigned long futex_val
;
22 unsigned long futex_mask
;
23 unsigned long futexv_owned
;
25 unsigned int futex_nr
;
29 struct io_futex_data
{
32 struct io_cache_entry cache
;
37 void io_futex_cache_init(struct io_ring_ctx
*ctx
)
39 io_alloc_cache_init(&ctx
->futex_cache
, IO_NODE_ALLOC_CACHE_MAX
,
40 sizeof(struct io_futex_data
));
43 static void io_futex_cache_entry_free(struct io_cache_entry
*entry
)
45 kfree(container_of(entry
, struct io_futex_data
, cache
));
48 void io_futex_cache_free(struct io_ring_ctx
*ctx
)
50 io_alloc_cache_free(&ctx
->futex_cache
, io_futex_cache_entry_free
);
53 static void __io_futex_complete(struct io_kiocb
*req
, struct io_tw_state
*ts
)
55 req
->async_data
= NULL
;
56 hlist_del_init(&req
->hash_node
);
57 io_req_task_complete(req
, ts
);
60 static void io_futex_complete(struct io_kiocb
*req
, struct io_tw_state
*ts
)
62 struct io_futex_data
*ifd
= req
->async_data
;
63 struct io_ring_ctx
*ctx
= req
->ctx
;
66 if (!io_alloc_cache_put(&ctx
->futex_cache
, &ifd
->cache
))
68 __io_futex_complete(req
, ts
);
71 static void io_futexv_complete(struct io_kiocb
*req
, struct io_tw_state
*ts
)
73 struct io_futex
*iof
= io_kiocb_to_cmd(req
, struct io_futex
);
74 struct futex_vector
*futexv
= req
->async_data
;
76 io_tw_lock(req
->ctx
, ts
);
78 if (!iof
->futexv_unqueued
) {
81 res
= futex_unqueue_multiple(futexv
, iof
->futex_nr
);
83 io_req_set_res(req
, res
, 0);
86 kfree(req
->async_data
);
87 req
->flags
&= ~REQ_F_ASYNC_DATA
;
88 __io_futex_complete(req
, ts
);
91 static bool io_futexv_claim(struct io_futex
*iof
)
93 if (test_bit(0, &iof
->futexv_owned
) ||
94 test_and_set_bit_lock(0, &iof
->futexv_owned
))
99 static bool __io_futex_cancel(struct io_ring_ctx
*ctx
, struct io_kiocb
*req
)
101 /* futex wake already done or in progress */
102 if (req
->opcode
== IORING_OP_FUTEX_WAIT
) {
103 struct io_futex_data
*ifd
= req
->async_data
;
105 if (!futex_unqueue(&ifd
->q
))
107 req
->io_task_work
.func
= io_futex_complete
;
109 struct io_futex
*iof
= io_kiocb_to_cmd(req
, struct io_futex
);
111 if (!io_futexv_claim(iof
))
113 req
->io_task_work
.func
= io_futexv_complete
;
116 hlist_del_init(&req
->hash_node
);
117 io_req_set_res(req
, -ECANCELED
, 0);
118 io_req_task_work_add(req
);
122 int io_futex_cancel(struct io_ring_ctx
*ctx
, struct io_cancel_data
*cd
,
123 unsigned int issue_flags
)
125 struct hlist_node
*tmp
;
126 struct io_kiocb
*req
;
129 if (cd
->flags
& (IORING_ASYNC_CANCEL_FD
|IORING_ASYNC_CANCEL_FD_FIXED
))
132 io_ring_submit_lock(ctx
, issue_flags
);
133 hlist_for_each_entry_safe(req
, tmp
, &ctx
->futex_list
, hash_node
) {
134 if (req
->cqe
.user_data
!= cd
->data
&&
135 !(cd
->flags
& IORING_ASYNC_CANCEL_ANY
))
137 if (__io_futex_cancel(ctx
, req
))
139 if (!(cd
->flags
& IORING_ASYNC_CANCEL_ALL
))
142 io_ring_submit_unlock(ctx
, issue_flags
);
150 bool io_futex_remove_all(struct io_ring_ctx
*ctx
, struct task_struct
*task
,
153 struct hlist_node
*tmp
;
154 struct io_kiocb
*req
;
157 lockdep_assert_held(&ctx
->uring_lock
);
159 hlist_for_each_entry_safe(req
, tmp
, &ctx
->futex_list
, hash_node
) {
160 if (!io_match_task_safe(req
, task
, cancel_all
))
162 __io_futex_cancel(ctx
, req
);
169 int io_futex_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
171 struct io_futex
*iof
= io_kiocb_to_cmd(req
, struct io_futex
);
174 if (unlikely(sqe
->len
|| sqe
->futex_flags
|| sqe
->buf_index
||
178 iof
->uaddr
= u64_to_user_ptr(READ_ONCE(sqe
->addr
));
179 iof
->futex_val
= READ_ONCE(sqe
->addr2
);
180 iof
->futex_mask
= READ_ONCE(sqe
->addr3
);
181 flags
= READ_ONCE(sqe
->fd
);
183 if (flags
& ~FUTEX2_VALID_MASK
)
186 iof
->futex_flags
= futex2_to_flags(flags
);
187 if (!futex_flags_valid(iof
->futex_flags
))
190 if (!futex_validate_input(iof
->futex_flags
, iof
->futex_val
) ||
191 !futex_validate_input(iof
->futex_flags
, iof
->futex_mask
))
197 static void io_futex_wakev_fn(struct wake_q_head
*wake_q
, struct futex_q
*q
)
199 struct io_kiocb
*req
= q
->wake_data
;
200 struct io_futex
*iof
= io_kiocb_to_cmd(req
, struct io_futex
);
202 if (!io_futexv_claim(iof
))
204 if (unlikely(!__futex_wake_mark(q
)))
207 io_req_set_res(req
, 0, 0);
208 req
->io_task_work
.func
= io_futexv_complete
;
209 io_req_task_work_add(req
);
212 int io_futexv_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
214 struct io_futex
*iof
= io_kiocb_to_cmd(req
, struct io_futex
);
215 struct futex_vector
*futexv
;
218 /* No flags or mask supported for waitv */
219 if (unlikely(sqe
->fd
|| sqe
->buf_index
|| sqe
->file_index
||
220 sqe
->addr2
|| sqe
->futex_flags
|| sqe
->addr3
))
223 iof
->uaddr
= u64_to_user_ptr(READ_ONCE(sqe
->addr
));
224 iof
->futex_nr
= READ_ONCE(sqe
->len
);
225 if (!iof
->futex_nr
|| iof
->futex_nr
> FUTEX_WAITV_MAX
)
228 futexv
= kcalloc(iof
->futex_nr
, sizeof(*futexv
), GFP_KERNEL
);
232 ret
= futex_parse_waitv(futexv
, iof
->uwaitv
, iof
->futex_nr
,
233 io_futex_wakev_fn
, req
);
239 iof
->futexv_owned
= 0;
240 iof
->futexv_unqueued
= 0;
241 req
->flags
|= REQ_F_ASYNC_DATA
;
242 req
->async_data
= futexv
;
246 static void io_futex_wake_fn(struct wake_q_head
*wake_q
, struct futex_q
*q
)
248 struct io_futex_data
*ifd
= container_of(q
, struct io_futex_data
, q
);
249 struct io_kiocb
*req
= ifd
->req
;
251 if (unlikely(!__futex_wake_mark(q
)))
254 io_req_set_res(req
, 0, 0);
255 req
->io_task_work
.func
= io_futex_complete
;
256 io_req_task_work_add(req
);
259 static struct io_futex_data
*io_alloc_ifd(struct io_ring_ctx
*ctx
)
261 struct io_cache_entry
*entry
;
263 entry
= io_alloc_cache_get(&ctx
->futex_cache
);
265 return container_of(entry
, struct io_futex_data
, cache
);
267 return kmalloc(sizeof(struct io_futex_data
), GFP_NOWAIT
);
270 int io_futexv_wait(struct io_kiocb
*req
, unsigned int issue_flags
)
272 struct io_futex
*iof
= io_kiocb_to_cmd(req
, struct io_futex
);
273 struct futex_vector
*futexv
= req
->async_data
;
274 struct io_ring_ctx
*ctx
= req
->ctx
;
277 io_ring_submit_lock(ctx
, issue_flags
);
279 ret
= futex_wait_multiple_setup(futexv
, iof
->futex_nr
, &woken
);
282 * Error case, ret is < 0. Mark the request as failed.
284 if (unlikely(ret
< 0)) {
285 io_ring_submit_unlock(ctx
, issue_flags
);
287 io_req_set_res(req
, ret
, 0);
289 req
->async_data
= NULL
;
290 req
->flags
&= ~REQ_F_ASYNC_DATA
;
295 * 0 return means that we successfully setup the waiters, and that
296 * nobody triggered a wakeup while we were doing so. If the wakeup
297 * happened post setup, the task_work will be run post this issue and
298 * under the submission lock. 1 means We got woken while setting up,
299 * let that side do the completion. Note that
300 * futex_wait_multiple_setup() will have unqueued all the futexes in
301 * this case. Mark us as having done that already, since this is
302 * different from normal wakeup.
306 * If futex_wait_multiple_setup() returns 0 for a
307 * successful setup, then the task state will not be
308 * runnable. This is fine for the sync syscall, as
309 * it'll be blocking unless we already got one of the
310 * futexes woken, but it obviously won't work for an
311 * async invocation. Mark us runnable again.
313 __set_current_state(TASK_RUNNING
);
314 hlist_add_head(&req
->hash_node
, &ctx
->futex_list
);
316 iof
->futexv_unqueued
= 1;
318 io_req_set_res(req
, woken
, 0);
321 io_ring_submit_unlock(ctx
, issue_flags
);
322 return IOU_ISSUE_SKIP_COMPLETE
;
325 int io_futex_wait(struct io_kiocb
*req
, unsigned int issue_flags
)
327 struct io_futex
*iof
= io_kiocb_to_cmd(req
, struct io_futex
);
328 struct io_ring_ctx
*ctx
= req
->ctx
;
329 struct io_futex_data
*ifd
= NULL
;
330 struct futex_hash_bucket
*hb
;
333 if (!iof
->futex_mask
) {
338 io_ring_submit_lock(ctx
, issue_flags
);
339 ifd
= io_alloc_ifd(ctx
);
345 req
->async_data
= ifd
;
346 ifd
->q
= futex_q_init
;
347 ifd
->q
.bitset
= iof
->futex_mask
;
348 ifd
->q
.wake
= io_futex_wake_fn
;
351 ret
= futex_wait_setup(iof
->uaddr
, iof
->futex_val
, iof
->futex_flags
,
354 hlist_add_head(&req
->hash_node
, &ctx
->futex_list
);
355 io_ring_submit_unlock(ctx
, issue_flags
);
357 futex_queue(&ifd
->q
, hb
);
358 return IOU_ISSUE_SKIP_COMPLETE
;
362 io_ring_submit_unlock(ctx
, issue_flags
);
366 io_req_set_res(req
, ret
, 0);
371 int io_futex_wake(struct io_kiocb
*req
, unsigned int issue_flags
)
373 struct io_futex
*iof
= io_kiocb_to_cmd(req
, struct io_futex
);
377 * Strict flags - ensure that waking 0 futexes yields a 0 result.
378 * See commit 43adf8449510 ("futex: FLAGS_STRICT") for details.
380 ret
= futex_wake(iof
->uaddr
, FLAGS_STRICT
| iof
->futex_flags
,
381 iof
->futex_val
, iof
->futex_mask
);
384 io_req_set_res(req
, ret
, 0);