/* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
struct io_buffer *kbuf;
- /*
- * stores buffer ID for ring provided buffers, valid IFF
- * REQ_F_BUFFER_RING is set.
- */
- struct io_buffer_list *buf_list;
-
struct io_rsrc_node *buf_node;
};
lockdep_assert_held(&req->ctx->uring_lock);
req_set_fail(req);
- io_req_set_res(req, res, io_put_kbuf(req, res, req->buf_list));
+ io_req_set_res(req, res, io_put_kbuf(req, res, NULL));
if (def->fail)
def->fail(req);
io_req_complete_defer(req);
switch (io_arm_poll_handler(req, 0)) {
case IO_APOLL_READY:
- io_kbuf_recycle(req, req->buf_list, 0);
+ io_kbuf_recycle(req, NULL, 0);
io_req_task_queue(req);
break;
case IO_APOLL_ABORTED:
- io_kbuf_recycle(req, req->buf_list, 0);
+ io_kbuf_recycle(req, NULL, 0);
io_queue_iowq(req);
break;
case IO_APOLL_OK:
if (*len == 0 || *len > buf->len)
*len = buf->len;
req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT;
- req->buf_list = bl;
req->buf_index = buf->bid;
+ sel.buf_list = bl;
sel.addr = u64_to_user_ptr(buf->addr);
if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) {
* the transfer completes (or if we get -EAGAIN and must poll of
* retry).
*/
- io_kbuf_commit(req, bl, *len, 1);
- req->buf_list = NULL;
+ io_kbuf_commit(req, sel.buf_list, *len, 1);
+ sel.buf_list = NULL;
}
return sel;
}
req->flags |= REQ_F_BL_EMPTY;
req->flags |= REQ_F_BUFFER_RING;
- req->buf_list = bl;
return iov - arg->iovs;
}
struct io_br_sel *sel, unsigned int issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
- struct io_buffer_list *bl;
int ret = -ENOENT;
io_ring_submit_lock(ctx, issue_flags);
- bl = io_buffer_get_list(ctx, arg->buf_group);
- if (unlikely(!bl))
+ sel->buf_list = io_buffer_get_list(ctx, arg->buf_group);
+ if (unlikely(!sel->buf_list))
goto out_unlock;
- if (bl->flags & IOBL_BUF_RING) {
- ret = io_ring_buffers_peek(req, arg, bl);
+ if (sel->buf_list->flags & IOBL_BUF_RING) {
+ ret = io_ring_buffers_peek(req, arg, sel->buf_list);
/*
* Don't recycle these buffers if we need to go through poll.
* Nobody else can use them anyway, and holding on to provided
*/
if (ret > 0) {
req->flags |= REQ_F_BUFFERS_COMMIT | REQ_F_BL_NO_RECYCLE;
- io_kbuf_commit(req, bl, arg->out_len, ret);
+ io_kbuf_commit(req, sel->buf_list, arg->out_len, ret);
}
} else {
- ret = io_provided_buffers_select(req, &arg->out_len, bl, arg->iovs);
+ ret = io_provided_buffers_select(req, &arg->out_len, sel->buf_list, arg->iovs);
}
out_unlock:
- io_ring_submit_unlock(ctx, issue_flags);
+ if (issue_flags & IO_URING_F_UNLOCKED) {
+ sel->buf_list = NULL;
+ mutex_unlock(&ctx->uring_lock);
+ }
return ret;
}
ret = io_ring_buffers_peek(req, arg, bl);
if (ret > 0)
req->flags |= REQ_F_BUFFERS_COMMIT;
+ sel->buf_list = bl;
return ret;
}
/* don't support multiple buffer selections for legacy */
+ sel->buf_list = NULL;
return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs);
}
};
/*
- * Return value from io_buffer_list selection. Just returns the error or
- * user address for now, will be extended to return the buffer list in the
- * future.
+ * Return value from io_buffer_list selection, to avoid stashing it in
+ * struct io_kiocb. For legacy/classic provided buffers, keeping a reference
+ * across execution contexts are fine. But for ring provided buffers, the
+ * list may go away as soon as ->uring_lock is dropped. As the io_kiocb
+ * persists, it's better to just keep the buffer local for those cases.
*/
struct io_br_sel {
+ struct io_buffer_list *buf_list;
/*
* Some selection parts return the user address, others return an error.
*/
static inline bool io_kbuf_recycle_ring(struct io_kiocb *req,
struct io_buffer_list *bl)
{
- /*
- * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear
- * the flag and hence ensure that bl->head doesn't get incremented.
- * If the tail has already been incremented, hang on to it.
- * The exception is partial io, that case we should increment bl->head
- * to monopolize the buffer.
- */
if (bl) {
req->flags &= ~(REQ_F_BUFFER_RING|REQ_F_BUFFERS_COMMIT);
return true;
if (req->opcode == IORING_OP_SENDMSG)
return -EINVAL;
sr->msg_flags |= MSG_WAITALL;
- req->buf_list = NULL;
req->flags |= REQ_F_MULTISHOT;
}
unsigned int cflags;
if (!(sr->flags & IORING_RECVSEND_BUNDLE)) {
- cflags = io_put_kbuf(req, sel->val, req->buf_list);
+ cflags = io_put_kbuf(req, sel->val, sel->buf_list);
goto finish;
}
- cflags = io_put_kbufs(req, sel->val, req->buf_list, io_bundle_nbufs(kmsg, sel->val));
+ cflags = io_put_kbufs(req, sel->val, sel->buf_list, io_bundle_nbufs(kmsg, sel->val));
if (bundle_finished || req->flags & REQ_F_BL_EMPTY)
goto finish;
flags |= MSG_DONTWAIT;
retry_bundle:
+ sel.buf_list = NULL;
if (io_do_buffer_select(req)) {
ret = io_send_select_buffer(req, issue_flags, &sel, kmsg);
if (ret)
sr->len -= ret;
sr->buf += ret;
sr->done_io += ret;
- return io_net_kbuf_recyle(req, req->buf_list, kmsg, ret);
+ return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret);
}
if (ret == -ERESTARTSYS)
ret = -EINTR;
req->flags |= REQ_F_NOWAIT;
if (sr->msg_flags & MSG_ERRQUEUE)
req->flags |= REQ_F_CLEAR_POLLIN;
- if (req->flags & REQ_F_BUFFER_SELECT) {
- /*
- * Store the buffer group for this multishot receive separately,
- * as if we end up doing an io-wq based issue that selects a
- * buffer, it has to be committed immediately and that will
- * clear ->buf_list. This means we lose the link to the buffer
- * list, and the eventual buffer put on completion then cannot
- * restore it.
- */
+ if (req->flags & REQ_F_BUFFER_SELECT)
sr->buf_group = req->buf_index;
- req->buf_list = NULL;
- }
sr->mshot_total_len = sr->mshot_len = 0;
if (sr->flags & IORING_RECV_MULTISHOT) {
if (!(req->flags & REQ_F_BUFFER_SELECT))
if (sr->flags & IORING_RECVSEND_BUNDLE) {
size_t this_ret = sel->val - sr->done_io;
- cflags |= io_put_kbufs(req, this_ret, req->buf_list, io_bundle_nbufs(kmsg, this_ret));
+ cflags |= io_put_kbufs(req, this_ret, sel->buf_list, io_bundle_nbufs(kmsg, this_ret));
if (sr->flags & IORING_RECV_RETRY)
cflags = req->cqe.flags | (cflags & CQE_F_MASK);
if (sr->mshot_len && sel->val >= sr->mshot_len)
return false;
}
} else {
- cflags |= io_put_kbuf(req, sel->val, req->buf_list);
+ cflags |= io_put_kbuf(req, sel->val, sel->buf_list);
}
/*
flags |= MSG_DONTWAIT;
retry_multishot:
+ sel.buf_list = NULL;
if (io_do_buffer_select(req)) {
size_t len = sr->len;
if (req->flags & REQ_F_APOLL_MULTISHOT) {
ret = io_recvmsg_prep_multishot(kmsg, sr, &sel.addr, &len);
if (ret) {
- io_kbuf_recycle(req, req->buf_list, issue_flags);
+ io_kbuf_recycle(req, sel.buf_list, issue_flags);
return ret;
}
}
if (ret < min_ret) {
if (ret == -EAGAIN && force_nonblock) {
- if (issue_flags & IO_URING_F_MULTISHOT)
- io_kbuf_recycle(req, req->buf_list, issue_flags);
-
+ io_kbuf_recycle(req, sel.buf_list, issue_flags);
return IOU_RETRY;
}
if (ret > 0 && io_net_retry(sock, flags)) {
sr->done_io += ret;
- return io_net_kbuf_recyle(req, req->buf_list, kmsg, ret);
+ return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret);
}
if (ret == -ERESTARTSYS)
ret = -EINTR;
else if (sr->done_io)
ret = sr->done_io;
else
- io_kbuf_recycle(req, req->buf_list, issue_flags);
+ io_kbuf_recycle(req, sel.buf_list, issue_flags);
sel.val = ret;
if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags))
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
struct io_async_msghdr *kmsg = req->async_data;
- struct io_br_sel sel = { };
+ struct io_br_sel sel;
struct socket *sock;
unsigned flags;
int ret, min_ret = 0;
flags |= MSG_DONTWAIT;
retry_multishot:
+ sel.buf_list = NULL;
if (io_do_buffer_select(req)) {
sel.val = sr->len;
ret = io_recv_buf_select(req, kmsg, &sel, issue_flags);
ret = sock_recvmsg(sock, &kmsg->msg, flags);
if (ret < min_ret) {
if (ret == -EAGAIN && force_nonblock) {
- if (issue_flags & IO_URING_F_MULTISHOT)
- io_kbuf_recycle(req, req->buf_list, issue_flags);
-
+ io_kbuf_recycle(req, sel.buf_list, issue_flags);
return IOU_RETRY;
}
if (ret > 0 && io_net_retry(sock, flags)) {
sr->len -= ret;
sr->buf += ret;
sr->done_io += ret;
- return io_net_kbuf_recyle(req, req->buf_list, kmsg, ret);
+ return io_net_kbuf_recyle(req, sel.buf_list, kmsg, ret);
}
if (ret == -ERESTARTSYS)
ret = -EINTR;
else if (sr->done_io)
ret = sr->done_io;
else
- io_kbuf_recycle(req, req->buf_list, issue_flags);
+ io_kbuf_recycle(req, sel.buf_list, issue_flags);
sel.val = ret;
if (!io_recv_finish(req, kmsg, &sel, mshot_finished, issue_flags))
ret = io_poll_check_events(req, tw);
if (ret == IOU_POLL_NO_ACTION) {
- io_kbuf_recycle(req, req->buf_list, 0);
+ io_kbuf_recycle(req, NULL, 0);
return;
} else if (ret == IOU_POLL_REQUEUE) {
- io_kbuf_recycle(req, req->buf_list, 0);
+ io_kbuf_recycle(req, NULL, 0);
__io_poll_execute(req, 0);
return;
}
req->flags |= REQ_F_POLLED;
ipt.pt._qproc = io_async_queue_proc;
- io_kbuf_recycle(req, req->buf_list, issue_flags);
+ io_kbuf_recycle(req, NULL, issue_flags);
ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, issue_flags);
if (ret)
io_req_io_end(req);
if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))
- req->cqe.flags |= io_put_kbuf(req, req->cqe.res, req->buf_list);
+ req->cqe.flags |= io_put_kbuf(req, req->cqe.res, NULL);
io_req_rw_cleanup(req, 0);
io_req_task_complete(req, tw);
}
static int kiocb_done(struct io_kiocb *req, ssize_t ret,
- unsigned int issue_flags)
+ struct io_br_sel *sel, unsigned int issue_flags)
{
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
unsigned final_ret = io_fixup_rw_res(req, ret);
* from the submission path.
*/
io_req_io_end(req);
- io_req_set_res(req, final_ret, io_put_kbuf(req, ret, req->buf_list));
+ io_req_set_res(req, final_ret, io_put_kbuf(req, ret, sel->buf_list));
io_req_rw_cleanup(req, issue_flags);
return IOU_COMPLETE;
} else {
ret = __io_read(req, &sel, issue_flags);
if (ret >= 0)
- return kiocb_done(req, ret, issue_flags);
+ return kiocb_done(req, ret, &sel, issue_flags);
if (req->flags & REQ_F_BUFFERS_COMMIT)
- io_kbuf_recycle(req, req->buf_list, issue_flags);
+ io_kbuf_recycle(req, sel.buf_list, issue_flags);
return ret;
}
* Reset rw->len to 0 again to avoid clamping future mshot
* reads, in case the buffer size varies.
*/
- if (io_kbuf_recycle(req, req->buf_list, issue_flags))
+ if (io_kbuf_recycle(req, sel.buf_list, issue_flags))
rw->len = 0;
return IOU_RETRY;
} else if (ret <= 0) {
- io_kbuf_recycle(req, req->buf_list, issue_flags);
+ io_kbuf_recycle(req, sel.buf_list, issue_flags);
if (ret < 0)
req_set_fail(req);
} else if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
- cflags = io_put_kbuf(req, ret, req->buf_list);
+ cflags = io_put_kbuf(req, ret, sel.buf_list);
} else {
/*
* Any successful return value will keep the multishot read
* we fail to post a CQE, or multishot is no longer set, then
* jump to the termination path. This request is then done.
*/
- cflags = io_put_kbuf(req, ret, req->buf_list);
+ cflags = io_put_kbuf(req, ret, sel.buf_list);
rw->len = 0; /* similarly to above, reset len to 0 */
if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
return -EAGAIN;
}
done:
- return kiocb_done(req, ret2, issue_flags);
+ return kiocb_done(req, ret2, NULL, issue_flags);
} else {
ret_eagain:
iov_iter_restore(&io->iter, &io->iter_state);
if (!smp_load_acquire(&req->iopoll_completed))
break;
nr_events++;
- req->cqe.flags = io_put_kbuf(req, req->cqe.res, req->buf_list);
+ req->cqe.flags = io_put_kbuf(req, req->cqe.res, NULL);
if (req->opcode != IORING_OP_URING_CMD)
io_req_rw_cleanup(req, 0);
}