1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/file.h>
5 #include <linux/slab.h>
6 #include <linux/nospec.h>
7 #include <linux/io_uring.h>
9 #include <uapi/linux/io_uring.h>
13 #include "filetable.h"
17 /* All valid masks for MSG_RING */
18 #define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \
19 IORING_MSG_RING_FLAGS_PASS)
23 struct file
*src_file
;
24 struct callback_head tw
;
36 static void io_double_unlock_ctx(struct io_ring_ctx
*octx
)
38 mutex_unlock(&octx
->uring_lock
);
41 static int io_double_lock_ctx(struct io_ring_ctx
*octx
,
42 unsigned int issue_flags
)
45 * To ensure proper ordering between the two ctxs, we can only
46 * attempt a trylock on the target. If that fails and we already have
47 * the source ctx lock, punt to io-wq.
49 if (!(issue_flags
& IO_URING_F_UNLOCKED
)) {
50 if (!mutex_trylock(&octx
->uring_lock
))
54 mutex_lock(&octx
->uring_lock
);
58 void io_msg_ring_cleanup(struct io_kiocb
*req
)
60 struct io_msg
*msg
= io_kiocb_to_cmd(req
, struct io_msg
);
62 if (WARN_ON_ONCE(!msg
->src_file
))
69 static inline bool io_msg_need_remote(struct io_ring_ctx
*target_ctx
)
71 if (!target_ctx
->task_complete
)
73 return current
!= target_ctx
->submitter_task
;
76 static int io_msg_exec_remote(struct io_kiocb
*req
, task_work_func_t func
)
78 struct io_ring_ctx
*ctx
= req
->file
->private_data
;
79 struct io_msg
*msg
= io_kiocb_to_cmd(req
, struct io_msg
);
80 struct task_struct
*task
= READ_ONCE(ctx
->submitter_task
);
85 init_task_work(&msg
->tw
, func
);
86 if (task_work_add(ctx
->submitter_task
, &msg
->tw
, TWA_SIGNAL
))
89 return IOU_ISSUE_SKIP_COMPLETE
;
92 static void io_msg_tw_complete(struct callback_head
*head
)
94 struct io_msg
*msg
= container_of(head
, struct io_msg
, tw
);
95 struct io_kiocb
*req
= cmd_to_io_kiocb(msg
);
96 struct io_ring_ctx
*target_ctx
= req
->file
->private_data
;
99 if (current
->flags
& PF_EXITING
) {
104 if (msg
->flags
& IORING_MSG_RING_FLAGS_PASS
)
105 flags
= msg
->cqe_flags
;
108 * If the target ring is using IOPOLL mode, then we need to be
109 * holding the uring_lock for posting completions. Other ring
110 * types rely on the regular completion locking, which is
111 * handled while posting.
113 if (target_ctx
->flags
& IORING_SETUP_IOPOLL
)
114 mutex_lock(&target_ctx
->uring_lock
);
115 if (!io_post_aux_cqe(target_ctx
, msg
->user_data
, msg
->len
, flags
))
117 if (target_ctx
->flags
& IORING_SETUP_IOPOLL
)
118 mutex_unlock(&target_ctx
->uring_lock
);
123 io_req_queue_tw_complete(req
, ret
);
126 static int io_msg_ring_data(struct io_kiocb
*req
, unsigned int issue_flags
)
128 struct io_ring_ctx
*target_ctx
= req
->file
->private_data
;
129 struct io_msg
*msg
= io_kiocb_to_cmd(req
, struct io_msg
);
133 if (msg
->src_fd
|| msg
->flags
& ~IORING_MSG_RING_FLAGS_PASS
)
135 if (!(msg
->flags
& IORING_MSG_RING_FLAGS_PASS
) && msg
->dst_fd
)
137 if (target_ctx
->flags
& IORING_SETUP_R_DISABLED
)
140 if (io_msg_need_remote(target_ctx
))
141 return io_msg_exec_remote(req
, io_msg_tw_complete
);
143 if (msg
->flags
& IORING_MSG_RING_FLAGS_PASS
)
144 flags
= msg
->cqe_flags
;
147 if (target_ctx
->flags
& IORING_SETUP_IOPOLL
) {
148 if (unlikely(io_double_lock_ctx(target_ctx
, issue_flags
)))
150 if (io_post_aux_cqe(target_ctx
, msg
->user_data
, msg
->len
, flags
))
152 io_double_unlock_ctx(target_ctx
);
154 if (io_post_aux_cqe(target_ctx
, msg
->user_data
, msg
->len
, flags
))
160 static struct file
*io_msg_grab_file(struct io_kiocb
*req
, unsigned int issue_flags
)
162 struct io_msg
*msg
= io_kiocb_to_cmd(req
, struct io_msg
);
163 struct io_ring_ctx
*ctx
= req
->ctx
;
164 struct file
*file
= NULL
;
165 unsigned long file_ptr
;
166 int idx
= msg
->src_fd
;
168 io_ring_submit_lock(ctx
, issue_flags
);
169 if (likely(idx
< ctx
->nr_user_files
)) {
170 idx
= array_index_nospec(idx
, ctx
->nr_user_files
);
171 file_ptr
= io_fixed_file_slot(&ctx
->file_table
, idx
)->file_ptr
;
172 file
= (struct file
*) (file_ptr
& FFS_MASK
);
176 io_ring_submit_unlock(ctx
, issue_flags
);
180 static int io_msg_install_complete(struct io_kiocb
*req
, unsigned int issue_flags
)
182 struct io_ring_ctx
*target_ctx
= req
->file
->private_data
;
183 struct io_msg
*msg
= io_kiocb_to_cmd(req
, struct io_msg
);
184 struct file
*src_file
= msg
->src_file
;
187 if (unlikely(io_double_lock_ctx(target_ctx
, issue_flags
)))
190 ret
= __io_fixed_fd_install(target_ctx
, src_file
, msg
->dst_fd
);
194 msg
->src_file
= NULL
;
195 req
->flags
&= ~REQ_F_NEED_CLEANUP
;
197 if (msg
->flags
& IORING_MSG_RING_CQE_SKIP
)
200 * If this fails, the target still received the file descriptor but
201 * wasn't notified of the fact. This means that if this request
202 * completes with -EOVERFLOW, then the sender must ensure that a
203 * later IORING_OP_MSG_RING delivers the message.
205 if (!io_post_aux_cqe(target_ctx
, msg
->user_data
, ret
, 0))
208 io_double_unlock_ctx(target_ctx
);
212 static void io_msg_tw_fd_complete(struct callback_head
*head
)
214 struct io_msg
*msg
= container_of(head
, struct io_msg
, tw
);
215 struct io_kiocb
*req
= cmd_to_io_kiocb(msg
);
216 int ret
= -EOWNERDEAD
;
218 if (!(current
->flags
& PF_EXITING
))
219 ret
= io_msg_install_complete(req
, IO_URING_F_UNLOCKED
);
222 io_req_queue_tw_complete(req
, ret
);
225 static int io_msg_send_fd(struct io_kiocb
*req
, unsigned int issue_flags
)
227 struct io_ring_ctx
*target_ctx
= req
->file
->private_data
;
228 struct io_msg
*msg
= io_kiocb_to_cmd(req
, struct io_msg
);
229 struct io_ring_ctx
*ctx
= req
->ctx
;
230 struct file
*src_file
= msg
->src_file
;
234 if (target_ctx
== ctx
)
236 if (target_ctx
->flags
& IORING_SETUP_R_DISABLED
)
239 src_file
= io_msg_grab_file(req
, issue_flags
);
242 msg
->src_file
= src_file
;
243 req
->flags
|= REQ_F_NEED_CLEANUP
;
246 if (io_msg_need_remote(target_ctx
))
247 return io_msg_exec_remote(req
, io_msg_tw_fd_complete
);
248 return io_msg_install_complete(req
, issue_flags
);
251 int io_msg_ring_prep(struct io_kiocb
*req
, const struct io_uring_sqe
*sqe
)
253 struct io_msg
*msg
= io_kiocb_to_cmd(req
, struct io_msg
);
255 if (unlikely(sqe
->buf_index
|| sqe
->personality
))
258 msg
->src_file
= NULL
;
259 msg
->user_data
= READ_ONCE(sqe
->off
);
260 msg
->len
= READ_ONCE(sqe
->len
);
261 msg
->cmd
= READ_ONCE(sqe
->addr
);
262 msg
->src_fd
= READ_ONCE(sqe
->addr3
);
263 msg
->dst_fd
= READ_ONCE(sqe
->file_index
);
264 msg
->flags
= READ_ONCE(sqe
->msg_ring_flags
);
265 if (msg
->flags
& ~IORING_MSG_RING_MASK
)
271 int io_msg_ring(struct io_kiocb
*req
, unsigned int issue_flags
)
273 struct io_msg
*msg
= io_kiocb_to_cmd(req
, struct io_msg
);
277 if (!io_is_uring_fops(req
->file
))
281 case IORING_MSG_DATA
:
282 ret
= io_msg_ring_data(req
, issue_flags
);
284 case IORING_MSG_SEND_FD
:
285 ret
= io_msg_send_fd(req
, issue_flags
);
294 if (ret
== -EAGAIN
|| ret
== IOU_ISSUE_SKIP_COMPLETE
)
298 io_req_set_res(req
, ret
, 0);