From: Jens Axboe Date: Sat, 14 Mar 2026 14:41:04 +0000 (-0600) Subject: io_uring: switch struct io_ring_ctx internal bitfields to flags X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=f1a424e21c15993db0f9594cda17ef5d516ab3e9;p=thirdparty%2Fkernel%2Flinux.git io_uring: switch struct io_ring_ctx internal bitfields to flags Bitfields cannot be set and checked atomically, and this makes it more clear that these are indeed in shared storage and must be checked and set in a sane fashion. This is in preparation for annotating a few of the known racy, but harmless, flags checking. No intended functional changes in this patch. Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Jens Axboe --- diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index dd1420bfcb73..0b3f08adc217 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -268,24 +268,30 @@ struct io_alloc_cache { unsigned int init_clear; }; +enum { + IO_RING_F_DRAIN_NEXT = BIT(0), + IO_RING_F_OP_RESTRICTED = BIT(1), + IO_RING_F_REG_RESTRICTED = BIT(2), + IO_RING_F_OFF_TIMEOUT_USED = BIT(3), + IO_RING_F_DRAIN_ACTIVE = BIT(4), + IO_RING_F_HAS_EVFD = BIT(5), + /* all CQEs should be posted only by the submitter task */ + IO_RING_F_TASK_COMPLETE = BIT(6), + IO_RING_F_LOCKLESS_CQ = BIT(7), + IO_RING_F_SYSCALL_IOPOLL = BIT(8), + IO_RING_F_POLL_ACTIVATED = BIT(9), + IO_RING_F_DRAIN_DISABLED = BIT(10), + IO_RING_F_COMPAT = BIT(11), + IO_RING_F_IOWQ_LIMITS_SET = BIT(12), +}; + struct io_ring_ctx { /* const or read-mostly hot data */ struct { + /* ring setup flags */ unsigned int flags; - unsigned int drain_next: 1; - unsigned int op_restricted: 1; - unsigned int reg_restricted: 1; - unsigned int off_timeout_used: 1; - unsigned int drain_active: 1; - unsigned int has_evfd: 1; - /* all CQEs should be posted only by the submitter task */ - unsigned int task_complete: 1; - unsigned int lockless_cq: 1; - unsigned int syscall_iopoll: 1; - unsigned int poll_activated: 1; - unsigned int drain_disabled: 1; - unsigned int compat: 1; - unsigned int iowq_limits_set : 1; + /* internal state flags IO_RING_F_* flags , mostly read-only */ + unsigned int int_flags; struct task_struct *submitter_task; struct io_rings *rings; diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c index 7482a7dc6b38..3da028500f76 100644 --- a/io_uring/eventfd.c +++ b/io_uring/eventfd.c @@ -148,7 +148,7 @@ int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg, spin_unlock(&ctx->completion_lock); ev_fd->eventfd_async = eventfd_async; - ctx->has_evfd = true; + ctx->int_flags |= IO_RING_F_HAS_EVFD; refcount_set(&ev_fd->refs, 1); atomic_set(&ev_fd->ops, 0); rcu_assign_pointer(ctx->io_ev_fd, ev_fd); @@ -162,7 +162,7 @@ int io_eventfd_unregister(struct io_ring_ctx *ctx) ev_fd = rcu_dereference_protected(ctx->io_ev_fd, lockdep_is_held(&ctx->uring_lock)); if (ev_fd) { - ctx->has_evfd = false; + ctx->int_flags &= ~IO_RING_F_HAS_EVFD; rcu_assign_pointer(ctx->io_ev_fd, NULL); io_eventfd_put(ev_fd); return 0; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 9a37035e76c0..bfeb3bc3849d 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -477,17 +477,17 @@ static __cold noinline void io_queue_deferred(struct io_ring_ctx *ctx) void __io_commit_cqring_flush(struct io_ring_ctx *ctx) { - if (ctx->poll_activated) + if (ctx->int_flags & IO_RING_F_POLL_ACTIVATED) io_poll_wq_wake(ctx); - if (ctx->off_timeout_used) + if (ctx->int_flags & IO_RING_F_OFF_TIMEOUT_USED) io_flush_timeouts(ctx); - if (ctx->has_evfd) + if (ctx->int_flags & IO_RING_F_HAS_EVFD) io_eventfd_signal(ctx, true); } static inline void __io_cq_lock(struct io_ring_ctx *ctx) { - if (!ctx->lockless_cq) + if (!(ctx->int_flags & IO_RING_F_LOCKLESS_CQ)) spin_lock(&ctx->completion_lock); } @@ -500,11 +500,11 @@ static inline void io_cq_lock(struct io_ring_ctx *ctx) static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx) { io_commit_cqring(ctx); - if (!ctx->task_complete) { - if (!ctx->lockless_cq) + if (!(ctx->int_flags & IO_RING_F_TASK_COMPLETE)) { + if (!(ctx->int_flags & IO_RING_F_LOCKLESS_CQ)) spin_unlock(&ctx->completion_lock); /* IOPOLL rings only need to wake up if it's also SQPOLL */ - if (!ctx->syscall_iopoll) + if (!(ctx->int_flags & IO_RING_F_SYSCALL_IOPOLL)) io_cqring_wake(ctx); } io_commit_cqring_flush(ctx); @@ -830,7 +830,7 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags) { lockdep_assert_held(&ctx->uring_lock); - lockdep_assert(ctx->lockless_cq); + lockdep_assert(ctx->int_flags & IO_RING_F_LOCKLESS_CQ); if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) { struct io_cqe cqe = io_init_cqe(user_data, res, cflags); @@ -860,7 +860,7 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags) lockdep_assert(!io_wq_current_is_worker()); lockdep_assert_held(&ctx->uring_lock); - if (!ctx->lockless_cq) { + if (!(ctx->int_flags & IO_RING_F_LOCKLESS_CQ)) { spin_lock(&ctx->completion_lock); posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags); spin_unlock(&ctx->completion_lock); @@ -885,7 +885,7 @@ bool io_req_post_cqe32(struct io_kiocb *req, struct io_uring_cqe cqe[2]) lockdep_assert_held(&ctx->uring_lock); cqe[0].user_data = req->cqe.user_data; - if (!ctx->lockless_cq) { + if (!(ctx->int_flags & IO_RING_F_LOCKLESS_CQ)) { spin_lock(&ctx->completion_lock); posted = io_fill_cqe_aux32(ctx, cqe); spin_unlock(&ctx->completion_lock); @@ -913,7 +913,7 @@ static void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags) * Handle special CQ sync cases via task_work. DEFER_TASKRUN requires * the submitter task context, IOPOLL protects with uring_lock. */ - if (ctx->lockless_cq || (req->flags & REQ_F_REISSUE)) { + if ((ctx->int_flags & IO_RING_F_LOCKLESS_CQ) || (req->flags & REQ_F_REISSUE)) { defer_complete: req->io_task_work.func = io_req_task_complete; io_req_task_work_add(req); @@ -1135,7 +1135,7 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx) */ if (!(req->flags & (REQ_F_CQE_SKIP | REQ_F_REISSUE)) && unlikely(!io_fill_cqe_req(ctx, req))) { - if (ctx->lockless_cq) + if (ctx->int_flags & IO_RING_F_LOCKLESS_CQ) io_cqe_overflow(ctx, &req->cqe, &req->big_cqe); else io_cqe_overflow_locked(ctx, &req->cqe, &req->big_cqe); @@ -1148,7 +1148,7 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx) INIT_WQ_LIST(&state->compl_reqs); } - if (unlikely(ctx->drain_active)) + if (unlikely(ctx->int_flags & IO_RING_F_DRAIN_ACTIVE)) io_queue_deferred(ctx); ctx->submit_state.cq_flush = false; @@ -1344,7 +1344,7 @@ static __cold void io_drain_req(struct io_kiocb *req) list_add_tail(&de->list, &ctx->defer_list); io_queue_deferred(ctx); if (!drain && list_empty(&ctx->defer_list)) - ctx->drain_active = false; + ctx->int_flags &= ~IO_RING_F_DRAIN_ACTIVE; } static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def, @@ -1655,7 +1655,7 @@ static void io_queue_sqe_fallback(struct io_kiocb *req) } else { /* can't fail with IO_URING_F_INLINE */ io_req_sqe_copy(req, IO_URING_F_INLINE); - if (unlikely(req->ctx->drain_active)) + if (unlikely(req->ctx->int_flags & IO_RING_F_DRAIN_ACTIVE)) io_drain_req(req); else io_queue_iowq(req); @@ -1671,7 +1671,7 @@ static inline bool io_check_restriction(struct io_ring_ctx *ctx, struct io_kiocb *req, unsigned int sqe_flags) { - if (!ctx->op_restricted) + if (!(ctx->int_flags & IO_RING_F_OP_RESTRICTED)) return true; if (!test_bit(req->opcode, ctx->restrictions.sqe_op)) return false; @@ -1691,7 +1691,7 @@ static void io_init_drain(struct io_ring_ctx *ctx) { struct io_kiocb *head = ctx->submit_state.link.head; - ctx->drain_active = true; + ctx->int_flags |= IO_RING_F_DRAIN_ACTIVE; if (head) { /* * If we need to drain a request in the middle of a link, drain @@ -1701,7 +1701,7 @@ static void io_init_drain(struct io_ring_ctx *ctx) * link. */ head->flags |= REQ_F_IO_DRAIN | REQ_F_FORCE_ASYNC; - ctx->drain_next = true; + ctx->int_flags |= IO_RING_F_DRAIN_NEXT; } } @@ -1767,23 +1767,23 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, req->buf_index = READ_ONCE(sqe->buf_group); } if (sqe_flags & IOSQE_CQE_SKIP_SUCCESS) - ctx->drain_disabled = true; + ctx->int_flags |= IO_RING_F_DRAIN_DISABLED; if (sqe_flags & IOSQE_IO_DRAIN) { - if (ctx->drain_disabled) + if (ctx->int_flags & IO_RING_F_DRAIN_DISABLED) return io_init_fail_req(req, -EOPNOTSUPP); io_init_drain(ctx); } } - if (unlikely(ctx->op_restricted || ctx->drain_active || ctx->drain_next)) { + if (unlikely(ctx->int_flags & (IO_RING_F_OP_RESTRICTED | IO_RING_F_DRAIN_ACTIVE | IO_RING_F_DRAIN_NEXT))) { if (!io_check_restriction(ctx, req, sqe_flags)) return io_init_fail_req(req, -EACCES); /* knock it to the slow queue path, will be drained there */ - if (ctx->drain_active) + if (ctx->int_flags & IO_RING_F_DRAIN_ACTIVE) req->flags |= REQ_F_FORCE_ASYNC; /* if there is no link, we're at "next" request and need to drain */ - if (unlikely(ctx->drain_next) && !ctx->submit_state.link.head) { - ctx->drain_next = false; - ctx->drain_active = true; + if (unlikely(ctx->int_flags & IO_RING_F_DRAIN_NEXT) && !ctx->submit_state.link.head) { + ctx->int_flags &= ~IO_RING_F_DRAIN_NEXT; + ctx->int_flags |= IO_RING_F_DRAIN_ACTIVE; req->flags |= REQ_F_IO_DRAIN | REQ_F_FORCE_ASYNC; } } @@ -2204,7 +2204,7 @@ static __cold void io_activate_pollwq_cb(struct callback_head *cb) poll_wq_task_work); mutex_lock(&ctx->uring_lock); - ctx->poll_activated = true; + ctx->int_flags |= IO_RING_F_POLL_ACTIVATED; mutex_unlock(&ctx->uring_lock); /* @@ -2219,9 +2219,9 @@ __cold void io_activate_pollwq(struct io_ring_ctx *ctx) { spin_lock(&ctx->completion_lock); /* already activated or in progress */ - if (ctx->poll_activated || ctx->poll_wq_task_work.func) + if ((ctx->int_flags & IO_RING_F_POLL_ACTIVATED) || ctx->poll_wq_task_work.func) goto out; - if (WARN_ON_ONCE(!ctx->task_complete)) + if (WARN_ON_ONCE(!(ctx->int_flags & IO_RING_F_TASK_COMPLETE))) goto out; if (!ctx->submitter_task) goto out; @@ -2242,7 +2242,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) struct io_ring_ctx *ctx = file->private_data; __poll_t mask = 0; - if (unlikely(!ctx->poll_activated)) + if (unlikely(!(ctx->int_flags & IO_RING_F_POLL_ACTIVATED))) io_activate_pollwq(ctx); /* * provides mb() which pairs with barrier from wq_has_sleeper @@ -2607,7 +2607,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, goto out; } if (flags & IORING_ENTER_GETEVENTS) { - if (ctx->syscall_iopoll) + if (ctx->int_flags & IO_RING_F_SYSCALL_IOPOLL) goto iopoll_locked; /* * Ignore errors, we'll soon call io_cqring_wait() and @@ -2622,7 +2622,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, if (flags & IORING_ENTER_GETEVENTS) { int ret2; - if (ctx->syscall_iopoll) { + if (ctx->int_flags & IO_RING_F_SYSCALL_IOPOLL) { /* * We disallow the app entering submit/complete with * polling, but we still need to lock the ring to @@ -2923,9 +2923,9 @@ static void io_ctx_restriction_clone(struct io_ring_ctx *ctx, if (dst->bpf_filters) WRITE_ONCE(ctx->bpf_filters, dst->bpf_filters->filters); if (dst->op_registered) - ctx->op_restricted = 1; + ctx->int_flags |= IO_RING_F_OP_RESTRICTED; if (dst->reg_registered) - ctx->reg_restricted = 1; + ctx->int_flags |= IO_RING_F_REG_RESTRICTED; } static __cold int io_uring_create(struct io_ctx_config *config) @@ -2952,17 +2952,18 @@ static __cold int io_uring_create(struct io_ctx_config *config) if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) && !(ctx->flags & IORING_SETUP_IOPOLL)) - ctx->task_complete = true; + ctx->int_flags |= IO_RING_F_TASK_COMPLETE; - if (ctx->task_complete || (ctx->flags & IORING_SETUP_IOPOLL)) - ctx->lockless_cq = true; + if ((ctx->int_flags & IO_RING_F_TASK_COMPLETE) || + (ctx->flags & IORING_SETUP_IOPOLL)) + ctx->int_flags |= IO_RING_F_LOCKLESS_CQ; /* * lazy poll_wq activation relies on ->task_complete for synchronisation * purposes, see io_activate_pollwq() */ - if (!ctx->task_complete) - ctx->poll_activated = true; + if (!(ctx->int_flags & IO_RING_F_TASK_COMPLETE)) + ctx->int_flags |= IO_RING_F_POLL_ACTIVATED; /* * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user @@ -2972,9 +2973,10 @@ static __cold int io_uring_create(struct io_ctx_config *config) */ if (ctx->flags & IORING_SETUP_IOPOLL && !(ctx->flags & IORING_SETUP_SQPOLL)) - ctx->syscall_iopoll = 1; + ctx->int_flags |= IO_RING_F_SYSCALL_IOPOLL; - ctx->compat = in_compat_syscall(); + if (in_compat_syscall()) + ctx->int_flags |= IO_RING_F_COMPAT; if (!ns_capable_noaudit(&init_user_ns, CAP_IPC_LOCK)) ctx->user = get_uid(current_user()); diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 0fa844faf287..5cb1983043cd 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -211,7 +211,7 @@ static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx) if (ctx->flags & IORING_SETUP_IOPOLL) { lockdep_assert_held(&ctx->uring_lock); - } else if (!ctx->task_complete) { + } else if (!(ctx->int_flags & IO_RING_F_TASK_COMPLETE)) { lockdep_assert_held(&ctx->completion_lock); } else if (ctx->submitter_task) { /* @@ -228,7 +228,7 @@ static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx) static inline bool io_is_compat(struct io_ring_ctx *ctx) { - return IS_ENABLED(CONFIG_COMPAT) && unlikely(ctx->compat); + return IS_ENABLED(CONFIG_COMPAT) && unlikely(ctx->int_flags & IO_RING_F_COMPAT); } static inline void io_submit_flush_completions(struct io_ring_ctx *ctx) @@ -472,8 +472,9 @@ static inline void io_req_complete_defer(struct io_kiocb *req) static inline void io_commit_cqring_flush(struct io_ring_ctx *ctx) { - if (unlikely(ctx->off_timeout_used || - ctx->has_evfd || ctx->poll_activated)) + if (unlikely(ctx->int_flags & (IO_RING_F_OFF_TIMEOUT_USED | + IO_RING_F_HAS_EVFD | + IO_RING_F_POLL_ACTIVATED))) __io_commit_cqring_flush(ctx); } diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 57ad0085869a..3ff9098573db 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -67,7 +67,7 @@ void io_msg_ring_cleanup(struct io_kiocb *req) static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx) { - return target_ctx->task_complete; + return target_ctx->int_flags & IO_RING_F_TASK_COMPLETE; } static void io_msg_tw_complete(struct io_tw_req tw_req, io_tw_token_t tw) diff --git a/io_uring/register.c b/io_uring/register.c index 0148735f7711..489a6feaf228 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -184,9 +184,9 @@ static __cold int io_register_restrictions(struct io_ring_ctx *ctx, return ret; } if (ctx->restrictions.op_registered) - ctx->op_restricted = 1; + ctx->int_flags |= IO_RING_F_OP_RESTRICTED; if (ctx->restrictions.reg_registered) - ctx->reg_restricted = 1; + ctx->int_flags |= IO_RING_F_REG_RESTRICTED; return 0; } @@ -384,7 +384,7 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, for (i = 0; i < ARRAY_SIZE(new_count); i++) if (new_count[i]) ctx->iowq_limits[i] = new_count[i]; - ctx->iowq_limits_set = true; + ctx->int_flags |= IO_RING_F_IOWQ_LIMITS_SET; if (tctx && tctx->io_wq) { ret = io_wq_max_workers(tctx->io_wq, new_count); @@ -725,7 +725,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, if (ctx->submitter_task && ctx->submitter_task != current) return -EEXIST; - if (ctx->reg_restricted && !(ctx->flags & IORING_SETUP_R_DISABLED)) { + if ((ctx->int_flags & IO_RING_F_REG_RESTRICTED) && !(ctx->flags & IORING_SETUP_R_DISABLED)) { opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); if (!test_bit(opcode, ctx->restrictions.register_op)) return -EACCES; diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 4fa59bf89bba..52554ed89b11 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -295,7 +295,7 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, u64 tag = 0; uvec = u64_to_user_ptr(user_data); - iov = iovec_from_user(uvec, 1, 1, &fast_iov, ctx->compat); + iov = iovec_from_user(uvec, 1, 1, &fast_iov, io_is_compat(ctx)); if (IS_ERR(iov)) { err = PTR_ERR(iov); break; @@ -319,7 +319,7 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, i = array_index_nospec(up->offset + done, ctx->buf_table.nr); io_reset_rsrc_node(ctx, &ctx->buf_table, i); ctx->buf_table.nodes[i] = node; - if (ctx->compat) + if (io_is_compat(ctx)) user_data += sizeof(struct compat_iovec); else user_data += sizeof(struct iovec); @@ -883,12 +883,12 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, if (arg) { uvec = (struct iovec __user *) arg; - iov = iovec_from_user(uvec, 1, 1, &fast_iov, ctx->compat); + iov = iovec_from_user(uvec, 1, 1, &fast_iov, io_is_compat(ctx)); if (IS_ERR(iov)) { ret = PTR_ERR(iov); break; } - if (ctx->compat) + if (io_is_compat(ctx)) arg += sizeof(struct compat_iovec); else arg += sizeof(struct iovec); diff --git a/io_uring/tctx.c b/io_uring/tctx.c index 7cbcb82aedfb..143de8e990eb 100644 --- a/io_uring/tctx.c +++ b/io_uring/tctx.c @@ -121,7 +121,7 @@ int __io_uring_add_tctx_node(struct io_ring_ctx *ctx) return ret; tctx = current->io_uring; - if (ctx->iowq_limits_set) { + if (ctx->int_flags & IO_RING_F_IOWQ_LIMITS_SET) { unsigned int limits[2] = { ctx->iowq_limits[0], ctx->iowq_limits[1], }; diff --git a/io_uring/timeout.c b/io_uring/timeout.c index 8eddf8add7a2..579fdddac71a 100644 --- a/io_uring/timeout.c +++ b/io_uring/timeout.c @@ -566,8 +566,8 @@ static int __io_timeout_prep(struct io_kiocb *req, INIT_LIST_HEAD(&timeout->list); timeout->off = off; - if (unlikely(off && !req->ctx->off_timeout_used)) - req->ctx->off_timeout_used = true; + if (unlikely(off && !(req->ctx->int_flags & IO_RING_F_OFF_TIMEOUT_USED))) + req->ctx->int_flags |= IO_RING_F_OFF_TIMEOUT_USED; /* * for multishot reqs w/ fixed nr of repeats, repeats tracks the * remaining nr diff --git a/io_uring/tw.c b/io_uring/tw.c index 2f2b4ac4b126..022fe8753c19 100644 --- a/io_uring/tw.c +++ b/io_uring/tw.c @@ -222,7 +222,7 @@ void io_req_local_work_add(struct io_kiocb *req, unsigned flags) if (!head) { io_ctx_mark_taskrun(ctx); - if (ctx->has_evfd) + if (ctx->int_flags & IO_RING_F_HAS_EVFD) io_eventfd_signal(ctx, false); }