1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/file.h>
5 #include <linux/slab.h>
6 #include <linux/nospec.h>
7 #include <linux/io_uring.h>
9 #include <uapi/linux/io_uring.h>
13 #include "filetable.h"
17 /* All valid masks for MSG_RING */
18 #define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \
19 IORING_MSG_RING_FLAGS_PASS)
23 struct file
*src_file
;
24 struct callback_head tw
;
36 static void io_double_unlock_ctx(struct io_ring_ctx
*octx
)
38 mutex_unlock(&octx
->uring_lock
);
41 static int io_double_lock_ctx(struct io_ring_ctx
*octx
,
42 unsigned int issue_flags
)
45 * To ensure proper ordering between the two ctxs, we can only
46 * attempt a trylock on the target. If that fails and we already have
47 * the source ctx lock, punt to io-wq.
49 if (!(issue_flags
& IO_URING_F_UNLOCKED
)) {
50 if (!mutex_trylock(&octx
->uring_lock
))
54 mutex_lock(&octx
->uring_lock
);
58 void io_msg_ring_cleanup(struct io_kiocb
*req
)
60 struct io_msg
*msg
= io_kiocb_to_cmd(req
, struct io_msg
);
62 if (WARN_ON_ONCE(!msg
->src_file
))
69 static inline bool io_msg_need_remote(struct io_ring_ctx
*target_ctx
)
71 if (!target_ctx
->task_complete
)
73 return current
!= target_ctx
->submitter_task
;
76 static int io_msg_exec_remote(struct io_kiocb
*req
, task_work_func_t func
)
78 struct io_ring_ctx
*ctx
= req
->file
->private_data
;
79 struct io_msg
*msg
= io_kiocb_to_cmd(req
, struct io_msg
);
80 struct task_struct
*task
= READ_ONCE(ctx
->submitter_task
);
85 init_task_work(&msg
->tw
, func
);
86 if (task_work_add(ctx
->submitter_task
, &msg
->tw
, TWA_SIGNAL
))
89 return IOU_ISSUE_SKIP_COMPLETE
;
92 static void io_msg_tw_complete(struct callback_head
*head
)
94 struct io_msg
*msg
= container_of(head
, struct io_msg
, tw
);
95 struct io_kiocb
*req
= cmd_to_io_kiocb(msg
);
96 struct io_ring_ctx
*target_ctx
= req
->file
->private_data
;
99 if (current
->flags
& PF_EXITING
) {
104 if (msg
->flags
& IORING_MSG_RING_FLAGS_PASS
)
105 flags
= msg
->cqe_flags
;
108 * If the target ring is using IOPOLL mode, then we need to be
109 * holding the uring_lock for posting completions. Other ring
110 * types rely on the regular completion locking, which is
111 * handled while posting.
113 if (target_ctx
->flags
& IORING_SETUP_IOPOLL
)
114 mutex_lock(&target_ctx
->uring_lock
);
115 if (!io_post_aux_cqe(target_ctx
, msg
->user_data
, msg
->len
, flags
))
117 if (target_ctx
->flags
& IORING_SETUP_IOPOLL
)
118 mutex_unlock(&target_ctx
->uring_lock
);
123 io_req_queue_tw_complete(req
, ret
);
126 static int io_msg_ring_data(struct io_kiocb
*req
, unsigned int issue_flags
)
128 struct io_ring_ctx
*target_ctx
= req
->file
->private_data
;
129 struct io_msg
*msg
= io_kiocb_to_cmd(req
, struct io_msg
);
133 if (msg
->src_fd
|| msg
->flags
& ~IORING_MSG_RING_FLAGS_PASS
)
135 if (!(msg
->flags
& IORING_MSG_RING_FLAGS_PASS
) && msg
->dst_fd
)
137 if (target_ctx
->flags
& IORING_SETUP_R_DISABLED
)
140 if (io_msg_need_remote(target_ctx
))
141 return io_msg_exec_remote(req
, io_msg_tw_complete
);
143 if (msg
->flags
& IORING_MSG_RING_FLAGS_PASS
)
144 flags
= msg
->cqe_flags
;
147 if (target_ctx
->flags
& IORING_SETUP_IOPOLL
) {
148 if (unlikely(io_double_lock_ctx(target_ctx
, issue_flags
)))
150 if (io_post_aux_cqe(target_ctx
, msg
->user_data
, msg
->len
, flags
))
152 io_double_unlock_ctx(target_ctx
);
154 if (io_post_aux_cqe(target_ctx
, msg
->user_data
, msg
->len
, flags
))
160 static struct file
*io_msg_grab_file(struct io_kiocb
*req
, unsigned int issue_flags
)
162 struct io_msg
*msg
= io_kiocb_to_cmd(req
, struct io_msg
);
163 struct io_ring_ctx
*ctx
= req
->ctx
;
164 struct file
*file
= NULL
;
165 int idx
= msg
->src_fd
;
167 io_ring_submit_lock(ctx
, issue_flags
);
168 if (likely(idx
< ctx
->nr_user_files
)) {
169 idx
= array_index_nospec(idx
, ctx
->nr_user_files
);
170 file
= io_file_from_index(&ctx
->file_table
, idx
);
174 io_ring_submit_unlock(ctx
, issue_flags
);
178 static int io_msg_install_complete(struct io_kiocb
*req
, unsigned int issue_flags
)
180 struct io_ring_ctx
*target_ctx
= req
->file
->private_data
;
181 struct io_msg
*msg
= io_kiocb_to_cmd(req
, struct io_msg
);
182 struct file
*src_file
= msg
->src_file
;
185 if (unlikely(io_double_lock_ctx(target_ctx
, issue_flags
)))
188 ret
= __io_fixed_fd_install(target_ctx
, src_file
, msg
->dst_fd
);
192 msg
->src_file
= NULL
;
193 req
->flags
&= ~REQ_F_NEED_CLEANUP
;
195 if (msg
->flags
& IORING_MSG_RING_CQE_SKIP
)
198 * If this fails, the target still received the file descriptor but
199 * wasn't notified of the fact. This means that if this request
200 * completes with -EOVERFLOW, then the sender must ensure that a
201 * later IORING_OP_MSG_RING delivers the message.
203 if (!io_post_aux_cqe(target_ctx
, msg
->user_data
, ret
, 0))
206 io_double_unlock_ctx(target_ctx
);
210 static void io_msg_tw_fd_complete(struct callback_head
*head
)
212 struct io_msg
*msg
= container_of(head
, struct io_msg
, tw
);
213 struct io_kiocb
*req
= cmd_to_io_kiocb(msg
);
214 int ret
= -EOWNERDEAD
;
216 if (!(current
->flags
& PF_EXITING
))
217 ret
= io_msg_install_complete(req
, IO_URING_F_UNLOCKED
);
220 io_req_queue_tw_complete(req
, ret
);
223 static int io_msg_send_fd(struct io_kiocb
*req
, unsigned int issue_flags
)
225 struct io_ring_ctx
*target_ctx
= req
->file
->private_data
;
226 struct io_msg
*msg
= io_kiocb_to_cmd(req
, struct io_msg
);
227 struct io_ring_ctx
*ctx
= req
->ctx
;
228 struct file
*src_file
= msg
->src_file
;
232 if (target_ctx
== ctx
)
234 if (target_ctx
->flags
& IORING_SETUP_R_DISABLED
)
237 src_file
= io_msg_grab_file(req
, issue_flags
);
240 msg
->src_file
= src_file
;
241 req
->flags
|= REQ_F_NEED_CLEANUP
;
244 if (io_msg_need_remote(target_ctx
))
245 return io_msg_exec_remote(req
, io_msg_tw_fd_complete
);
246 return io_msg_install_complete(req
, issue_flags
);
249 int io_msg_ring_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
251 struct io_msg
*msg
= io_kiocb_to_cmd(req
, struct io_msg
);
253 if (unlikely(sqe
->buf_index
|| sqe
->personality
))
256 msg
->src_file
= NULL
;
257 msg
->user_data
= READ_ONCE(sqe
->off
);
258 msg
->len
= READ_ONCE(sqe
->len
);
259 msg
->cmd
= READ_ONCE(sqe
->addr
);
260 msg
->src_fd
= READ_ONCE(sqe
->addr3
);
261 msg
->dst_fd
= READ_ONCE(sqe
->file_index
);
262 msg
->flags
= READ_ONCE(sqe
->msg_ring_flags
);
263 if (msg
->flags
& ~IORING_MSG_RING_MASK
)
269 int io_msg_ring(struct io_kiocb
*req
, unsigned int issue_flags
)
271 struct io_msg
*msg
= io_kiocb_to_cmd(req
, struct io_msg
);
275 if (!io_is_uring_fops(req
->file
))
279 case IORING_MSG_DATA
:
280 ret
= io_msg_ring_data(req
, issue_flags
);
282 case IORING_MSG_SEND_FD
:
283 ret
= io_msg_send_fd(req
, issue_flags
);
292 if (ret
== -EAGAIN
|| ret
== IOU_ISSUE_SKIP_COMPLETE
)
296 io_req_set_res(req
, ret
, 0);