From: Jens Axboe Date: Mon, 15 Jun 2026 19:43:16 +0000 (-0600) Subject: io_uring: get rid of tw_pending for !DEFER task work X-Git-Tag: v7.2-rc1~32^2~4 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=ca4aa97194ae353c2882d7cb4ed123a544892bcf;p=thirdparty%2Flinux.git io_uring: get rid of tw_pending for !DEFER task work The normal task_work path used a tw_pending bit to ensure the callback was only added once: the mpscq drains incrementally, so a single tctx_task_work() run can take the queue through empty -> non-empty several times, and each transition would otherwise re-add the already pending callback_head. This corrupts the task_work list, and is what tw_pending protects again. This can go away, if we stop running the task_work as soon as the queue empties. Suggested-by: Caleb Sander Mateos Reviewed-by: Caleb Sander Mateos Signed-off-by: Jens Axboe --- diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 6415a3353ee0e..87151a5b62c1b 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -149,8 +149,6 @@ struct io_uring_task { struct { /* task_work */ struct mpscq task_list; - /* BIT(0) guards adding tw only once */ - unsigned long tw_pending; struct callback_head task_work; } ____cacheline_aligned_in_smp; }; diff --git a/io_uring/mpscq.h b/io_uring/mpscq.h index c801384c6a0aa..f910526766fd8 100644 --- a/io_uring/mpscq.h +++ b/io_uring/mpscq.h @@ -122,4 +122,13 @@ static inline struct llist_node *mpscq_pop(struct mpscq *q, return NULL; } +/* + * Returns true if the most recent mpscq_pop() that returned a node also + * emptied the queue. Consumer must be serialized. + */ +static inline bool mpscq_pop_emptied(struct mpscq *q, struct llist_node *head) +{ + return head == &q->stub; +} + #endif /* IOU_MPSCQ_H */ diff --git a/io_uring/tw.c b/io_uring/tw.c index e74372233f40b..f2ce806b01a1e 100644 --- a/io_uring/tw.c +++ b/io_uring/tw.c @@ -34,10 +34,6 @@ void io_tctx_fallback_work(struct work_struct *work) fallback_work); unsigned int count = 0; - /* see tctx_task_work() - a set bit must always have a run coming */ - clear_bit(0, &tctx->tw_pending); - smp_mb__after_atomic(); - /* * Run the entries directly. We're in PF_KTHRED context, hence * io_should_terminate_tw() is true and they will be marked as @@ -101,6 +97,13 @@ void tctx_task_work_run(struct io_uring_task *tctx, unsigned int max_entries, io_poll_task_func, io_req_rw_complete, (struct io_tw_req){req}, ts); (*count)++; + /* + * Break if most recent pop emptied the queue. This helps + * bound task_work run, and also protects the regular + * task_work addition. + */ + if (mpscq_pop_emptied(&tctx->task_list, tctx->task_head)) + break; if (unlikely(need_resched())) { ctx_flush_and_put(ctx, ts); ctx = NULL; @@ -127,8 +130,6 @@ void tctx_task_work(struct callback_head *cb) unsigned int count = 0; tctx = container_of(cb, struct io_uring_task, task_work); - clear_bit(0, &tctx->tw_pending); - smp_mb__after_atomic(); tctx_task_work_run(tctx, UINT_MAX, &count); } @@ -206,7 +207,7 @@ void io_req_normal_work_add(struct io_kiocb *req) struct io_uring_task *tctx = req->tctx; struct io_ring_ctx *ctx = req->ctx; - /* task_work already pending, we're done */ + /* tw run already pending, nothing else to do */ if (!mpscq_push(&tctx->task_list, &req->io_task_work.node)) return; @@ -223,10 +224,6 @@ void io_req_normal_work_add(struct io_kiocb *req) return; } - /* task_work must only be added once */ - if (test_and_set_bit(0, &tctx->tw_pending)) - return; - if (likely(!task_work_add(tctx->task, &tctx->task_work, ctx->notify_method))) return;