From: Greg Kroah-Hartman Date: Tue, 12 Apr 2022 05:13:08 +0000 (+0200) Subject: 5.17-stable patches X-Git-Tag: v4.9.310~4 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=fc6346c5d22641367403857e61fbfdf7b98dff44;p=thirdparty%2Fkernel%2Fstable-queue.git 5.17-stable patches added patches: io_uring-defer-file-assignment.patch io_uring-drop-the-old-style-inflight-file-tracking.patch io_uring-move-read-write-file-prep-state-into-actual-opcode-handler.patch io_uring-propagate-issue_flags-state-down-to-file-assignment.patch --- diff --git a/queue-5.17/io_uring-defer-file-assignment.patch b/queue-5.17/io_uring-defer-file-assignment.patch new file mode 100644 index 00000000000..19470eb761d --- /dev/null +++ b/queue-5.17/io_uring-defer-file-assignment.patch @@ -0,0 +1,147 @@ +From foo@baz Tue Apr 12 07:11:16 AM CEST 2022 +From: Jens Axboe +Date: Tue, 29 Mar 2022 10:10:08 -0600 +Subject: io_uring: defer file assignment + +From: Jens Axboe + +commit 6bf9c47a398911e0ab920e362115153596c80432 upstream. + +If an application uses direct open or accept, it knows in advance what +direct descriptor value it will get as it picks it itself. This allows +combined requests such as: + +sqe = io_uring_get_sqe(ring); +io_uring_prep_openat_direct(sqe, ..., file_slot); +sqe->flags |= IOSQE_IO_LINK | IOSQE_CQE_SKIP_SUCCESS; + +sqe = io_uring_get_sqe(ring); +io_uring_prep_read(sqe,file_slot, buf, buf_size, 0); +sqe->flags |= IOSQE_FIXED_FILE; + +io_uring_submit(ring); + +where we prepare both a file open and read, and only get a completion +event for the read when both have completed successfully. + +Currently links are fully prepared before the head is issued, but that +fails if the dependent link needs a file assigned that isn't valid until +the head has completed. + +Conversely, if the same chain is performed but the fixed file slot is +already valid, then we would be unexpectedly returning data from the +old file slot rather than the newly opened one. Make sure we're +consistent here. + +Allow deferral of file setup, which makes this documented case work. + +Cc: stable@vger.kernel.org # v5.15+ +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + fs/io-wq.h | 1 + + fs/io_uring.c | 39 +++++++++++++++++++++++++++++---------- + 2 files changed, 30 insertions(+), 10 deletions(-) + +--- a/fs/io-wq.h ++++ b/fs/io-wq.h +@@ -155,6 +155,7 @@ struct io_wq_work_node *wq_stack_extract + struct io_wq_work { + struct io_wq_work_node list; + unsigned flags; ++ int fd; + }; + + static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -6745,6 +6745,23 @@ static void io_clean_op(struct io_kiocb + req->flags &= ~IO_REQ_CLEAN_FLAGS; + } + ++static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags) ++{ ++ if (req->file || !io_op_defs[req->opcode].needs_file) ++ return true; ++ ++ if (req->flags & REQ_F_FIXED_FILE) ++ req->file = io_file_get_fixed(req, req->work.fd, issue_flags); ++ else ++ req->file = io_file_get_normal(req, req->work.fd); ++ if (req->file) ++ return true; ++ ++ req_set_fail(req); ++ req->result = -EBADF; ++ return false; ++} ++ + static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) + { + const struct cred *creds = NULL; +@@ -6755,6 +6772,8 @@ static int io_issue_sqe(struct io_kiocb + + if (!io_op_defs[req->opcode].audit_skip) + audit_uring_entry(req->opcode); ++ if (unlikely(!io_assign_file(req, issue_flags))) ++ return -EBADF; + + switch (req->opcode) { + case IORING_OP_NOP: +@@ -6896,10 +6915,11 @@ static struct io_wq_work *io_wq_free_wor + static void io_wq_submit_work(struct io_wq_work *work) + { + struct io_kiocb *req = container_of(work, struct io_kiocb, work); ++ const struct io_op_def *def = &io_op_defs[req->opcode]; + unsigned int issue_flags = IO_URING_F_UNLOCKED; + bool needs_poll = false; + struct io_kiocb *timeout; +- int ret = 0; ++ int ret = 0, err = -ECANCELED; + + /* one will be dropped by ->io_free_work() after returning to io-wq */ + if (!(req->flags & REQ_F_REFCOUNT)) +@@ -6911,14 +6931,18 @@ static void io_wq_submit_work(struct io_ + if (timeout) + io_queue_linked_timeout(timeout); + ++ if (!io_assign_file(req, issue_flags)) { ++ err = -EBADF; ++ work->flags |= IO_WQ_WORK_CANCEL; ++ } ++ + /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ + if (work->flags & IO_WQ_WORK_CANCEL) { +- io_req_task_queue_fail(req, -ECANCELED); ++ io_req_task_queue_fail(req, err); + return; + } + + if (req->flags & REQ_F_FORCE_ASYNC) { +- const struct io_op_def *def = &io_op_defs[req->opcode]; + bool opcode_poll = def->pollin || def->pollout; + + if (opcode_poll && file_can_poll(req->file)) { +@@ -7249,6 +7273,8 @@ static int io_init_req(struct io_ring_ct + if (io_op_defs[opcode].needs_file) { + struct io_submit_state *state = &ctx->submit_state; + ++ req->work.fd = READ_ONCE(sqe->fd); ++ + /* + * Plug now if we have more than 2 IO left after this, and the + * target is potentially a read/write to block based storage. +@@ -7258,13 +7284,6 @@ static int io_init_req(struct io_ring_ct + state->need_plug = false; + blk_start_plug_nr_ios(&state->plug, state->submit_nr); + } +- +- if (req->flags & REQ_F_FIXED_FILE) +- req->file = io_file_get_fixed(req, READ_ONCE(sqe->fd), 0); +- else +- req->file = io_file_get_normal(req, READ_ONCE(sqe->fd)); +- if (unlikely(!req->file)) +- return -EBADF; + } + + personality = READ_ONCE(sqe->personality); diff --git a/queue-5.17/io_uring-drop-the-old-style-inflight-file-tracking.patch b/queue-5.17/io_uring-drop-the-old-style-inflight-file-tracking.patch new file mode 100644 index 00000000000..dc59c07bcd1 --- /dev/null +++ b/queue-5.17/io_uring-drop-the-old-style-inflight-file-tracking.patch @@ -0,0 +1,211 @@ +From foo@baz Tue Apr 12 07:11:16 AM CEST 2022 +From: Jens Axboe +Date: Thu, 31 Mar 2022 12:38:46 -0600 +Subject: io_uring: drop the old style inflight file tracking + +From: Jens Axboe + +commit d5361233e9ab920e135819f73dd8466355f1fddd upstream. + +io_uring tracks requests that are referencing an io_uring descriptor to +be able to cancel without worrying about loops in the references. Since +we now assign the file at execution time, the easier approach is to drop +a potentially problematic reference before we punt the request. This +eliminates the need to special case these types of files beyond just +marking them as such, and simplifies cancelation quite a bit. + +This also fixes a recent issue where an async punted tee operation would +with the io_uring descriptor as the output file would crash when +attempting to get a reference to the file from the io-wq worker. We +could have worked around that, but this is the much cleaner fix. + +Fixes: 6bf9c47a3989 ("io_uring: defer file assignment") +Reported-by: syzbot+c4b9303500a21750b250@syzkaller.appspotmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + fs/io_uring.c | 85 ++++++++++++++++++---------------------------------------- + 1 file changed, 27 insertions(+), 58 deletions(-) + +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -112,8 +112,7 @@ + IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS) + + #define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \ +- REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \ +- REQ_F_ASYNC_DATA) ++ REQ_F_POLLED | REQ_F_CREDS | REQ_F_ASYNC_DATA) + + #define IO_TCTX_REFS_CACHE_NR (1U << 10) + +@@ -469,7 +468,6 @@ struct io_uring_task { + const struct io_ring_ctx *last; + struct io_wq *io_wq; + struct percpu_counter inflight; +- atomic_t inflight_tracked; + atomic_t in_idle; + + spinlock_t task_lock; +@@ -1131,6 +1129,8 @@ static void io_clean_op(struct io_kiocb + static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd, + unsigned issue_flags); + static inline struct file *io_file_get_normal(struct io_kiocb *req, int fd); ++static void io_drop_inflight_file(struct io_kiocb *req); ++static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags); + static void __io_queue_sqe(struct io_kiocb *req); + static void io_rsrc_put_work(struct work_struct *work); + +@@ -1312,29 +1312,9 @@ static bool io_match_task(struct io_kioc + bool cancel_all) + __must_hold(&req->ctx->timeout_lock) + { +- struct io_kiocb *req; +- + if (task && head->task != task) + return false; +- if (cancel_all) +- return true; +- +- io_for_each_link(req, head) { +- if (req->flags & REQ_F_INFLIGHT) +- return true; +- } +- return false; +-} +- +-static bool io_match_linked(struct io_kiocb *head) +-{ +- struct io_kiocb *req; +- +- io_for_each_link(req, head) { +- if (req->flags & REQ_F_INFLIGHT) +- return true; +- } +- return false; ++ return cancel_all; + } + + /* +@@ -1344,24 +1324,9 @@ static bool io_match_linked(struct io_ki + static bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, + bool cancel_all) + { +- bool matched; +- + if (task && head->task != task) + return false; +- if (cancel_all) +- return true; +- +- if (head->flags & REQ_F_LINK_TIMEOUT) { +- struct io_ring_ctx *ctx = head->ctx; +- +- /* protect against races with linked timeouts */ +- spin_lock_irq(&ctx->timeout_lock); +- matched = io_match_linked(head); +- spin_unlock_irq(&ctx->timeout_lock); +- } else { +- matched = io_match_linked(head); +- } +- return matched; ++ return cancel_all; + } + + static inline bool req_has_async_data(struct io_kiocb *req) +@@ -1509,14 +1474,6 @@ static inline bool io_req_ffs_set(struct + return req->flags & REQ_F_FIXED_FILE; + } + +-static inline void io_req_track_inflight(struct io_kiocb *req) +-{ +- if (!(req->flags & REQ_F_INFLIGHT)) { +- req->flags |= REQ_F_INFLIGHT; +- atomic_inc(¤t->io_uring->inflight_tracked); +- } +-} +- + static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req) + { + if (WARN_ON_ONCE(!req->link)) +@@ -2380,6 +2337,8 @@ static void io_req_task_work_add(struct + + WARN_ON_ONCE(!tctx); + ++ io_drop_inflight_file(req); ++ + spin_lock_irqsave(&tctx->task_lock, flags); + if (priority) + wq_list_add_tail(&req->io_task_work.node, &tctx->prior_task_list); +@@ -5548,7 +5507,10 @@ static int io_poll_check_events(struct i + if (!req->result) { + struct poll_table_struct pt = { ._key = poll->events }; + +- req->result = vfs_poll(req->file, &pt) & poll->events; ++ if (unlikely(!io_assign_file(req, IO_URING_F_UNLOCKED))) ++ req->result = -EBADF; ++ else ++ req->result = vfs_poll(req->file, &pt) & poll->events; + } + + /* multishot, just fill an CQE and proceed */ +@@ -6731,11 +6693,6 @@ static void io_clean_op(struct io_kiocb + kfree(req->apoll); + req->apoll = NULL; + } +- if (req->flags & REQ_F_INFLIGHT) { +- struct io_uring_task *tctx = req->task->io_uring; +- +- atomic_dec(&tctx->inflight_tracked); +- } + if (req->flags & REQ_F_CREDS) + put_cred(req->creds); + if (req->flags & REQ_F_ASYNC_DATA) { +@@ -7024,6 +6981,19 @@ out: + return file; + } + ++/* ++ * Drop the file for requeue operations. Only used of req->file is the ++ * io_uring descriptor itself. ++ */ ++static void io_drop_inflight_file(struct io_kiocb *req) ++{ ++ if (unlikely(req->flags & REQ_F_INFLIGHT)) { ++ fput(req->file); ++ req->file = NULL; ++ req->flags &= ~REQ_F_INFLIGHT; ++ } ++} ++ + static struct file *io_file_get_normal(struct io_kiocb *req, int fd) + { + struct file *file = fget(fd); +@@ -7031,8 +7001,8 @@ static struct file *io_file_get_normal(s + trace_io_uring_file_get(req->ctx, fd); + + /* we don't allow fixed io_uring files */ +- if (file && unlikely(file->f_op == &io_uring_fops)) +- io_req_track_inflight(req); ++ if (file && file->f_op == &io_uring_fops) ++ req->flags |= REQ_F_INFLIGHT; + return file; + } + +@@ -8804,7 +8774,6 @@ static __cold int io_uring_alloc_task_co + xa_init(&tctx->xa); + init_waitqueue_head(&tctx->wait); + atomic_set(&tctx->in_idle, 0); +- atomic_set(&tctx->inflight_tracked, 0); + task->io_uring = tctx; + spin_lock_init(&tctx->task_lock); + INIT_WQ_LIST(&tctx->task_list); +@@ -9942,7 +9911,7 @@ static __cold void io_uring_clean_tctx(s + static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked) + { + if (tracked) +- return atomic_read(&tctx->inflight_tracked); ++ return 0; + return percpu_counter_sum(&tctx->inflight); + } + diff --git a/queue-5.17/io_uring-move-read-write-file-prep-state-into-actual-opcode-handler.patch b/queue-5.17/io_uring-move-read-write-file-prep-state-into-actual-opcode-handler.patch new file mode 100644 index 00000000000..f0637a4838f --- /dev/null +++ b/queue-5.17/io_uring-move-read-write-file-prep-state-into-actual-opcode-handler.patch @@ -0,0 +1,212 @@ +From foo@baz Tue Apr 12 07:11:16 AM CEST 2022 +From: Jens Axboe +Date: Tue, 29 Mar 2022 10:48:05 -0600 +Subject: io_uring: move read/write file prep state into actual opcode handler + +From: Jens Axboe + +commit 584b0180f0f4d67d7145950fe68c625f06c88b10 upstream. + +In preparation for not necessarily having a file assigned at prep time, +defer any initialization associated with the file to when the opcode +handler is run. + +Cc: stable@vger.kernel.org # v5.15+ +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + fs/io_uring.c | 119 ++++++++++++++++++++++++++++++---------------------------- + 1 file changed, 62 insertions(+), 57 deletions(-) + +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -560,7 +560,8 @@ struct io_rw { + /* NOTE: kiocb has the file as the first member, so don't do it here */ + struct kiocb kiocb; + u64 addr; +- u64 len; ++ u32 len; ++ u32 flags; + }; + + struct io_connect { +@@ -2984,50 +2985,11 @@ static inline bool io_file_supports_nowa + + static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) + { +- struct io_ring_ctx *ctx = req->ctx; + struct kiocb *kiocb = &req->rw.kiocb; +- struct file *file = req->file; + unsigned ioprio; + int ret; + +- if (!io_req_ffs_set(req)) +- req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT; +- + kiocb->ki_pos = READ_ONCE(sqe->off); +- if (kiocb->ki_pos == -1) { +- if (!(file->f_mode & FMODE_STREAM)) { +- req->flags |= REQ_F_CUR_POS; +- kiocb->ki_pos = file->f_pos; +- } else { +- kiocb->ki_pos = 0; +- } +- } +- kiocb->ki_flags = iocb_flags(file); +- ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); +- if (unlikely(ret)) +- return ret; +- +- /* +- * If the file is marked O_NONBLOCK, still allow retry for it if it +- * supports async. Otherwise it's impossible to use O_NONBLOCK files +- * reliably. If not, or it IOCB_NOWAIT is set, don't retry. +- */ +- if ((kiocb->ki_flags & IOCB_NOWAIT) || +- ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req))) +- req->flags |= REQ_F_NOWAIT; +- +- if (ctx->flags & IORING_SETUP_IOPOLL) { +- if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll) +- return -EOPNOTSUPP; +- +- kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE; +- kiocb->ki_complete = io_complete_rw_iopoll; +- req->iopoll_completed = 0; +- } else { +- if (kiocb->ki_flags & IOCB_HIPRI) +- return -EINVAL; +- kiocb->ki_complete = io_complete_rw; +- } + + ioprio = READ_ONCE(sqe->ioprio); + if (ioprio) { +@@ -3043,6 +3005,7 @@ static int io_prep_rw(struct io_kiocb *r + req->imu = NULL; + req->rw.addr = READ_ONCE(sqe->addr); + req->rw.len = READ_ONCE(sqe->len); ++ req->rw.flags = READ_ONCE(sqe->rw_flags); + req->buf_index = READ_ONCE(sqe->buf_index); + return 0; + } +@@ -3523,13 +3486,6 @@ static inline int io_rw_prep_async(struc + return 0; + } + +-static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +-{ +- if (unlikely(!(req->file->f_mode & FMODE_READ))) +- return -EBADF; +- return io_prep_rw(req, sqe); +-} +- + /* + * This is our waitqueue callback handler, registered through __folio_lock_async() + * when we initially tried to do the IO with the iocb armed our waitqueue. +@@ -3617,6 +3573,58 @@ static bool need_read_all(struct io_kioc + S_ISBLK(file_inode(req->file)->i_mode); + } + ++static int io_rw_init_file(struct io_kiocb *req, fmode_t mode) ++{ ++ struct kiocb *kiocb = &req->rw.kiocb; ++ struct io_ring_ctx *ctx = req->ctx; ++ struct file *file = req->file; ++ int ret; ++ ++ if (unlikely(!file || !(file->f_mode & mode))) ++ return -EBADF; ++ ++ if (!io_req_ffs_set(req)) ++ req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT; ++ ++ if (kiocb->ki_pos == -1) { ++ if (!(file->f_mode & FMODE_STREAM)) { ++ req->flags |= REQ_F_CUR_POS; ++ kiocb->ki_pos = file->f_pos; ++ } else { ++ kiocb->ki_pos = 0; ++ } ++ } ++ ++ kiocb->ki_flags = iocb_flags(file); ++ ret = kiocb_set_rw_flags(kiocb, req->rw.flags); ++ if (unlikely(ret)) ++ return ret; ++ ++ /* ++ * If the file is marked O_NONBLOCK, still allow retry for it if it ++ * supports async. Otherwise it's impossible to use O_NONBLOCK files ++ * reliably. If not, or it IOCB_NOWAIT is set, don't retry. ++ */ ++ if ((kiocb->ki_flags & IOCB_NOWAIT) || ++ ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req))) ++ req->flags |= REQ_F_NOWAIT; ++ ++ if (ctx->flags & IORING_SETUP_IOPOLL) { ++ if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll) ++ return -EOPNOTSUPP; ++ ++ kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE; ++ kiocb->ki_complete = io_complete_rw_iopoll; ++ req->iopoll_completed = 0; ++ } else { ++ if (kiocb->ki_flags & IOCB_HIPRI) ++ return -EINVAL; ++ kiocb->ki_complete = io_complete_rw; ++ } ++ ++ return 0; ++} ++ + static int io_read(struct io_kiocb *req, unsigned int issue_flags) + { + struct io_rw_state __s, *s = &__s; +@@ -3641,6 +3649,9 @@ static int io_read(struct io_kiocb *req, + iov_iter_restore(&s->iter, &s->iter_state); + iovec = NULL; + } ++ ret = io_rw_init_file(req, FMODE_READ); ++ if (unlikely(ret)) ++ return ret; + req->result = iov_iter_count(&s->iter); + + if (force_nonblock) { +@@ -3739,14 +3750,6 @@ out_free: + return 0; + } + +-static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +-{ +- if (unlikely(!(req->file->f_mode & FMODE_WRITE))) +- return -EBADF; +- req->rw.kiocb.ki_hint = ki_hint_validate(file_write_hint(req->file)); +- return io_prep_rw(req, sqe); +-} +- + static int io_write(struct io_kiocb *req, unsigned int issue_flags) + { + struct io_rw_state __s, *s = &__s; +@@ -3766,6 +3769,9 @@ static int io_write(struct io_kiocb *req + iov_iter_restore(&s->iter, &s->iter_state); + iovec = NULL; + } ++ ret = io_rw_init_file(req, FMODE_WRITE); ++ if (unlikely(ret)) ++ return ret; + req->result = iov_iter_count(&s->iter); + + if (force_nonblock) { +@@ -6501,11 +6507,10 @@ static int io_req_prep(struct io_kiocb * + case IORING_OP_READV: + case IORING_OP_READ_FIXED: + case IORING_OP_READ: +- return io_read_prep(req, sqe); + case IORING_OP_WRITEV: + case IORING_OP_WRITE_FIXED: + case IORING_OP_WRITE: +- return io_write_prep(req, sqe); ++ return io_prep_rw(req, sqe); + case IORING_OP_POLL_ADD: + return io_poll_add_prep(req, sqe); + case IORING_OP_POLL_REMOVE: diff --git a/queue-5.17/io_uring-propagate-issue_flags-state-down-to-file-assignment.patch b/queue-5.17/io_uring-propagate-issue_flags-state-down-to-file-assignment.patch new file mode 100644 index 00000000000..2826298dc00 --- /dev/null +++ b/queue-5.17/io_uring-propagate-issue_flags-state-down-to-file-assignment.patch @@ -0,0 +1,213 @@ +From foo@baz Tue Apr 12 07:11:16 AM CEST 2022 +From: Jens Axboe +Date: Mon, 4 Apr 2022 17:18:43 -0600 +Subject: io_uring: propagate issue_flags state down to file assignment + +From: Jens Axboe + +commit 5106dd6e74ab6c94daac1c357094f11e6934b36f upstream. + +We'll need this in a future patch, when we could be assigning the file +after the prep stage. While at it, get rid of the io_file_get() helper, +it just makes the code harder to read. + +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + fs/io_uring.c | 82 +++++++++++++++++++++++++++++++++------------------------- + 1 file changed, 47 insertions(+), 35 deletions(-) + +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -1128,8 +1128,9 @@ static int __io_register_rsrc_update(str + struct io_uring_rsrc_update2 *up, + unsigned nr_args); + static void io_clean_op(struct io_kiocb *req); +-static struct file *io_file_get(struct io_ring_ctx *ctx, +- struct io_kiocb *req, int fd, bool fixed); ++static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd, ++ unsigned issue_flags); ++static inline struct file *io_file_get_normal(struct io_kiocb *req, int fd); + static void __io_queue_sqe(struct io_kiocb *req); + static void io_rsrc_put_work(struct work_struct *work); + +@@ -1258,13 +1259,20 @@ static void io_rsrc_refs_refill(struct i + } + + static inline void io_req_set_rsrc_node(struct io_kiocb *req, +- struct io_ring_ctx *ctx) ++ struct io_ring_ctx *ctx, ++ unsigned int issue_flags) + { + if (!req->fixed_rsrc_refs) { + req->fixed_rsrc_refs = &ctx->rsrc_node->refs; +- ctx->rsrc_cached_refs--; +- if (unlikely(ctx->rsrc_cached_refs < 0)) +- io_rsrc_refs_refill(ctx); ++ ++ if (!(issue_flags & IO_URING_F_UNLOCKED)) { ++ lockdep_assert_held(&ctx->uring_lock); ++ ctx->rsrc_cached_refs--; ++ if (unlikely(ctx->rsrc_cached_refs < 0)) ++ io_rsrc_refs_refill(ctx); ++ } else { ++ percpu_ref_get(req->fixed_rsrc_refs); ++ } + } + } + +@@ -3122,7 +3130,8 @@ static int __io_import_fixed(struct io_k + return 0; + } + +-static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter) ++static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter, ++ unsigned int issue_flags) + { + struct io_mapped_ubuf *imu = req->imu; + u16 index, buf_index = req->buf_index; +@@ -3132,7 +3141,7 @@ static int io_import_fixed(struct io_kio + + if (unlikely(buf_index >= ctx->nr_user_bufs)) + return -EFAULT; +- io_req_set_rsrc_node(req, ctx); ++ io_req_set_rsrc_node(req, ctx, issue_flags); + index = array_index_nospec(buf_index, ctx->nr_user_bufs); + imu = READ_ONCE(ctx->user_bufs[index]); + req->imu = imu; +@@ -3288,7 +3297,7 @@ static struct iovec *__io_import_iovec(i + ssize_t ret; + + if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { +- ret = io_import_fixed(req, rw, iter); ++ ret = io_import_fixed(req, rw, iter, issue_flags); + if (ret) + return ERR_PTR(ret); + return NULL; +@@ -4167,8 +4176,10 @@ static int io_tee(struct io_kiocb *req, + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + +- in = io_file_get(req->ctx, req, sp->splice_fd_in, +- (sp->flags & SPLICE_F_FD_IN_FIXED)); ++ if (sp->flags & SPLICE_F_FD_IN_FIXED) ++ in = io_file_get_fixed(req, sp->splice_fd_in, IO_URING_F_UNLOCKED); ++ else ++ in = io_file_get_normal(req, sp->splice_fd_in); + if (!in) { + ret = -EBADF; + goto done; +@@ -4207,8 +4218,10 @@ static int io_splice(struct io_kiocb *re + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + +- in = io_file_get(req->ctx, req, sp->splice_fd_in, +- (sp->flags & SPLICE_F_FD_IN_FIXED)); ++ if (sp->flags & SPLICE_F_FD_IN_FIXED) ++ in = io_file_get_fixed(req, sp->splice_fd_in, IO_URING_F_UNLOCKED); ++ else ++ in = io_file_get_normal(req, sp->splice_fd_in); + if (!in) { + ret = -EBADF; + goto done; +@@ -5513,7 +5526,7 @@ static void io_poll_remove_entries(struc + * either spurious wakeup or multishot CQE is served. 0 when it's done with + * the request, then the mask is stored in req->result. + */ +-static int io_poll_check_events(struct io_kiocb *req) ++static int io_poll_check_events(struct io_kiocb *req, bool locked) + { + struct io_ring_ctx *ctx = req->ctx; + struct io_poll_iocb *poll = io_poll_get_single(req); +@@ -5569,7 +5582,7 @@ static void io_poll_task_func(struct io_ + struct io_ring_ctx *ctx = req->ctx; + int ret; + +- ret = io_poll_check_events(req); ++ ret = io_poll_check_events(req, *locked); + if (ret > 0) + return; + +@@ -5594,7 +5607,7 @@ static void io_apoll_task_func(struct io + struct io_ring_ctx *ctx = req->ctx; + int ret; + +- ret = io_poll_check_events(req); ++ ret = io_poll_check_events(req, *locked); + if (ret > 0) + return; + +@@ -6962,30 +6975,36 @@ static void io_fixed_file_set(struct io_ + file_slot->file_ptr = file_ptr; + } + +-static inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx, +- struct io_kiocb *req, int fd) ++static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd, ++ unsigned int issue_flags) + { +- struct file *file; ++ struct io_ring_ctx *ctx = req->ctx; ++ struct file *file = NULL; + unsigned long file_ptr; + ++ if (issue_flags & IO_URING_F_UNLOCKED) ++ mutex_lock(&ctx->uring_lock); ++ + if (unlikely((unsigned int)fd >= ctx->nr_user_files)) +- return NULL; ++ goto out; + fd = array_index_nospec(fd, ctx->nr_user_files); + file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr; + file = (struct file *) (file_ptr & FFS_MASK); + file_ptr &= ~FFS_MASK; + /* mask in overlapping REQ_F and FFS bits */ + req->flags |= (file_ptr << REQ_F_SUPPORT_NOWAIT_BIT); +- io_req_set_rsrc_node(req, ctx); ++ io_req_set_rsrc_node(req, ctx, 0); ++out: ++ if (issue_flags & IO_URING_F_UNLOCKED) ++ mutex_unlock(&ctx->uring_lock); + return file; + } + +-static struct file *io_file_get_normal(struct io_ring_ctx *ctx, +- struct io_kiocb *req, int fd) ++static struct file *io_file_get_normal(struct io_kiocb *req, int fd) + { + struct file *file = fget(fd); + +- trace_io_uring_file_get(ctx, fd); ++ trace_io_uring_file_get(req->ctx, fd); + + /* we don't allow fixed io_uring files */ + if (file && unlikely(file->f_op == &io_uring_fops)) +@@ -6993,15 +7012,6 @@ static struct file *io_file_get_normal(s + return file; + } + +-static inline struct file *io_file_get(struct io_ring_ctx *ctx, +- struct io_kiocb *req, int fd, bool fixed) +-{ +- if (fixed) +- return io_file_get_fixed(ctx, req, fd); +- else +- return io_file_get_normal(ctx, req, fd); +-} +- + static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) + { + struct io_kiocb *prev = req->timeout.prev; +@@ -7249,8 +7259,10 @@ static int io_init_req(struct io_ring_ct + blk_start_plug_nr_ios(&state->plug, state->submit_nr); + } + +- req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd), +- (sqe_flags & IOSQE_FIXED_FILE)); ++ if (req->flags & REQ_F_FIXED_FILE) ++ req->file = io_file_get_fixed(req, READ_ONCE(sqe->fd), 0); ++ else ++ req->file = io_file_get_normal(req, READ_ONCE(sqe->fd)); + if (unlikely(!req->file)) + return -EBADF; + } diff --git a/queue-5.17/series b/queue-5.17/series index 207201a069d..f19415765cd 100644 --- a/queue-5.17/series +++ b/queue-5.17/series @@ -337,3 +337,7 @@ powerpc-64-fix-build-failure-with-allyesconfig-in-book3s_64_entry.s.patch irqchip-gic-gic-v3-prevent-gsi-to-sgi-translations.patch mm-sparsemem-fix-mem_section-will-never-be-null-gcc-12-warning.patch static_call-don-t-make-__static_call_return0-static.patch +io_uring-move-read-write-file-prep-state-into-actual-opcode-handler.patch +io_uring-propagate-issue_flags-state-down-to-file-assignment.patch +io_uring-defer-file-assignment.patch +io_uring-drop-the-old-style-inflight-file-tracking.patch