From: Jens Axboe Date: Thu, 11 Jun 2026 17:44:47 +0000 (-0600) Subject: io_uring: remove the per-ctx fallback task_work machinery X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=576cce91480a949f5b83578300f37023b933e0a2;p=thirdparty%2Fkernel%2Flinux.git io_uring: remove the per-ctx fallback task_work machinery With the tctx fallback running its entries directly, the per-ctx fallback work has a single user left: moving local (DEFER_TASKRUN) task_work entries out of a ring that is going away. Both of its call sites are process context and don't hold ->uring_lock, the same conditions the deferred fallback work itself ran under - so run the entries in cancel mode right there instead, and rename the helper to io_cancel_local_task_work() to match what it now does. With that, ->fallback_llist, ->fallback_work, io_fallback_req_func() and __io_fallback_tw() can all go away, along with the fallback work flushing in the ring exit and cancel paths. Requests that get orphaned by an exiting task now run via the tctx fallback work, which the ring exit side implicitly waits on through the ctx refs those requests hold. Signed-off-by: Jens Axboe --- diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index f511e96865b6c..6415a3353ee0e 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -498,8 +498,6 @@ struct io_ring_ctx { struct mutex tctx_lock; /* ctx exit and cancelation */ - struct llist_head fallback_llist; - struct delayed_work fallback_work; struct work_struct exit_work; struct completion ref_comp; diff --git a/io_uring/cancel.c b/io_uring/cancel.c index 5e5eb9cfc7cd6..b0259e74f6784 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -565,8 +565,6 @@ __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx, ret |= io_kill_timeouts(ctx, tctx, cancel_all); if (tctx) ret |= io_run_task_work() > 0; - else - ret |= flush_delayed_work(&ctx->fallback_work); return ret; } diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 0809fc70c91d6..5e687bbb973e4 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -289,7 +289,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) #ifdef CONFIG_FUTEX INIT_HLIST_HEAD(&ctx->futex_list); #endif - INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func); INIT_WQ_LIST(&ctx->submit_state.compl_reqs); INIT_HLIST_HEAD(&ctx->cancelable_uring_cmd); io_napi_init(ctx); @@ -1192,7 +1191,7 @@ __cold void io_iopoll_try_reap_events(struct io_ring_ctx *ctx) mutex_unlock(&ctx->uring_lock); if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) - io_move_task_work_from_local(ctx); + io_cancel_local_task_work(ctx); } static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned int min_events) @@ -2334,7 +2333,7 @@ static __cold void io_ring_exit_work(struct work_struct *work) /* The SQPOLL thread never reaches this path */ do { if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) - io_move_task_work_from_local(ctx); + io_cancel_local_task_work(ctx); cond_resched(); } while (io_uring_try_cancel_requests(ctx, NULL, true, false)); @@ -2420,8 +2419,6 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) io_unregister_personality(ctx, index); mutex_unlock(&ctx->uring_lock); - flush_delayed_work(&ctx->fallback_work); - INIT_WORK(&ctx->exit_work, io_ring_exit_work); /* * Use system_dfl_wq to avoid spawning tons of event kworkers diff --git a/io_uring/tw.c b/io_uring/tw.c index a5ed57ec34e6a..e74372233f40b 100644 --- a/io_uring/tw.c +++ b/io_uring/tw.c @@ -16,24 +16,6 @@ #include "wait.h" #include "mpscq.h" -void io_fallback_req_func(struct work_struct *work) -{ - struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, - fallback_work.work); - struct llist_node *node = llist_del_all(&ctx->fallback_llist); - struct io_kiocb *req, *tmp; - struct io_tw_state ts = {}; - - percpu_ref_get(&ctx->refs); - mutex_lock(&ctx->uring_lock); - ts.cancel = io_should_terminate_tw(ctx); - llist_for_each_entry_safe(req, tmp, node, io_task_work.node) - req->io_task_work.func((struct io_tw_req){req}, ts); - io_submit_flush_completions(ctx); - mutex_unlock(&ctx->uring_lock); - percpu_ref_put(&ctx->refs); -} - static void ctx_flush_and_put(struct io_ring_ctx *ctx, io_tw_token_t tw) { if (!ctx) @@ -46,34 +28,6 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, io_tw_token_t tw) percpu_ref_put(&ctx->refs); } -static __cold void __io_fallback_tw(struct llist_node *node, bool sync) -{ - struct io_ring_ctx *last_ctx = NULL; - struct io_kiocb *req; - - while (node) { - req = container_of(node, struct io_kiocb, io_task_work.node); - node = node->next; - if (last_ctx != req->ctx) { - if (last_ctx) { - if (sync) - flush_delayed_work(&last_ctx->fallback_work); - percpu_ref_put(&last_ctx->refs); - } - last_ctx = req->ctx; - percpu_ref_get(&last_ctx->refs); - } - if (llist_add(&req->io_task_work.node, &last_ctx->fallback_llist)) - schedule_delayed_work(&last_ctx->fallback_work, 1); - } - - if (last_ctx) { - if (sync) - flush_delayed_work(&last_ctx->fallback_work); - percpu_ref_put(&last_ctx->refs); - } -} - void io_tctx_fallback_work(struct work_struct *work) { struct io_uring_task *tctx = container_of(work, struct io_uring_task, @@ -286,29 +240,34 @@ void io_req_task_work_add_remote(struct io_kiocb *req, unsigned flags) __io_req_task_work_add(req, flags); } -void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx) +void __cold io_cancel_local_task_work(struct io_ring_ctx *ctx) { - struct llist_node *node, *first = NULL, **tail = &first; + struct io_tw_state ts = { .cancel = true }; + struct llist_node *node; /* * The work list consumer side is serialized by ->uring_lock, see * __io_run_local_work(). Grab it to guard against racing with normal - * task_work running, as the task may be exiting. + * task_work running, as the task may be exiting. The ring is going + * away, run the entries in cancel mode right here - the callers + * provide the same process context the per-ctx fallback work that + * they were previously punted to ran in. */ guard(mutex)(&ctx->uring_lock); while (!mpscq_empty(&ctx->work_list)) { + struct io_kiocb *req; + node = mpscq_pop(&ctx->work_list, &ctx->work_head); if (!node) { /* a producer is mid-push, wait for it to link */ - cpu_relax(); + cond_resched(); continue; } - *tail = node; - tail = &node->next; + req = container_of(node, struct io_kiocb, io_task_work.node); + req->io_task_work.func((struct io_tw_req){req}, ts); } - *tail = NULL; - __io_fallback_tw(first, false); + io_submit_flush_completions(ctx); } static bool io_run_local_work_continue(struct io_ring_ctx *ctx, int events, diff --git a/io_uring/tw.h b/io_uring/tw.h index 387e52004da80..3ade5ad577fda 100644 --- a/io_uring/tw.h +++ b/io_uring/tw.h @@ -30,8 +30,7 @@ void io_tctx_fallback_work(struct work_struct *work); int io_run_local_work(struct io_ring_ctx *ctx, int min_events, int max_events); int io_run_task_work_sig(struct io_ring_ctx *ctx); -__cold void io_fallback_req_func(struct work_struct *work); -__cold void io_move_task_work_from_local(struct io_ring_ctx *ctx); +__cold void io_cancel_local_task_work(struct io_ring_ctx *ctx); int io_run_local_work_locked(struct io_ring_ctx *ctx, int min_events); void io_req_local_work_add(struct io_kiocb *req, unsigned flags);