io_uring/tctx: add separate lock for list of tctx's in ctx

author Jens Axboe <axboe@kernel.dk>

Wed, 31 Dec 2025 15:12:46 +0000 (08:12 -0700)

committer Jens Axboe <axboe@kernel.dk>

Thu, 1 Jan 2026 15:16:40 +0000 (08:16 -0700)
author Jens Axboe <axboe@kernel.dk>
Wed, 31 Dec 2025 15:12:46 +0000 (08:12 -0700)
committer Jens Axboe <axboe@kernel.dk>
Thu, 1 Jan 2026 15:16:40 +0000 (08:16 -0700)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h

index e1adb0d20a0af2131c145f3f69efadffb34b47cd..a3e8ddc9b380f8493eca522f77799e7f98c5156b 100644 (file)
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -424,11 +424,17 @@ struct io_ring_ctx {
         struct user_struct              *user;
         struct mm_struct                *mm_account;
  
+       /*
+        * List of tctx nodes for this ctx, protected by tctx_lock. For
+        * cancelation purposes, nests under uring_lock.
+        */
+       struct list_head                tctx_list;
+       struct mutex                    tctx_lock;
+
         /* ctx exit and cancelation */
         struct llist_head               fallback_llist;
         struct delayed_work             fallback_work;
         struct work_struct              exit_work;
-       struct list_head                tctx_list;
         struct completion               ref_comp;
  
         /* io-wq management, e.g. thread count */
diff --git a/io_uring/cancel.c b/io_uring/cancel.c

index ca12ac10c0ae9e7da128a5e983c5dfa8504be4da..07b8d852218b116c7364cc261979329b347d8dab 100644 (file)
--- a/io_uring/cancel.c
+++ b/io_uring/cancel.c
@@ -184,7 +184,9 @@ static int __io_async_cancel(struct io_cancel_data *cd,
         } while (1);
  
         /* slow path, try all io-wq's */
+       __set_current_state(TASK_RUNNING);
         io_ring_submit_lock(ctx, issue_flags);
+       mutex_lock(&ctx->tctx_lock);
         ret = -ENOENT;
         list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
                 ret = io_async_cancel_one(node->task->io_uring, cd);
@@ -194,6 +196,7 @@ static int __io_async_cancel(struct io_cancel_data *cd,
                         nr++;
                 }
         }
+       mutex_unlock(&ctx->tctx_lock);
         io_ring_submit_unlock(ctx, issue_flags);
         return all ? nr : ret;
  }
@@ -484,6 +487,7 @@ static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx)
         bool ret = false;
  
         mutex_lock(&ctx->uring_lock);
+       mutex_lock(&ctx->tctx_lock);
         list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
                 struct io_uring_task *tctx = node->task->io_uring;
  
@@ -496,6 +500,7 @@ static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx)
                 cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true);
                 ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
         }
+       mutex_unlock(&ctx->tctx_lock);
         mutex_unlock(&ctx->uring_lock);
  
         return ret;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c

index 709943fedaf40f0734ade310600ce9d47576c38a..87a87396e9409f1bb1e53206cba0d4b914745e52 100644 (file)
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -340,6 +340,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
         INIT_LIST_HEAD(&ctx->ltimeout_list);
         init_llist_head(&ctx->work_llist);
         INIT_LIST_HEAD(&ctx->tctx_list);
+       mutex_init(&ctx->tctx_lock);
         ctx->submit_state.free_list.next = NULL;
         INIT_HLIST_HEAD(&ctx->waitid_list);
         xa_init_flags(&ctx->zcrx_ctxs, XA_FLAGS_ALLOC);
@@ -3045,6 +3046,7 @@ static __cold void io_ring_exit_work(struct work_struct *work)
         exit.ctx = ctx;
  
         mutex_lock(&ctx->uring_lock);
+       mutex_lock(&ctx->tctx_lock);
         while (!list_empty(&ctx->tctx_list)) {
                 WARN_ON_ONCE(time_after(jiffies, timeout));
  
@@ -3056,6 +3058,7 @@ static __cold void io_ring_exit_work(struct work_struct *work)
                 if (WARN_ON_ONCE(ret))
                         continue;
  
+               mutex_unlock(&ctx->tctx_lock);
                 mutex_unlock(&ctx->uring_lock);
                 /*
                  * See comment above for
@@ -3064,7 +3067,9 @@ static __cold void io_ring_exit_work(struct work_struct *work)
                  */
                 wait_for_completion_interruptible(&exit.completion);
                 mutex_lock(&ctx->uring_lock);
+               mutex_lock(&ctx->tctx_lock);
         }
+       mutex_unlock(&ctx->tctx_lock);
         mutex_unlock(&ctx->uring_lock);
         spin_lock(&ctx->completion_lock);
         spin_unlock(&ctx->completion_lock);
diff --git a/io_uring/register.c b/io_uring/register.c

index 62d39b3ff317e7b0540152b5c6f019bfab0d66fd..3d3822ff3fd9e1bddb0b4ad02f2e2eda3b0cc114 100644 (file)
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -320,6 +320,7 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
                 return 0;
  
         /* now propagate the restriction to all registered users */
+       mutex_lock(&ctx->tctx_lock);
         list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
                 tctx = node->task->io_uring;
                 if (WARN_ON_ONCE(!tctx->io_wq))
@@ -330,6 +331,7 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
                 /* ignore errors, it always returns zero anyway */
                 (void)io_wq_max_workers(tctx->io_wq, new_count);
         }
+       mutex_unlock(&ctx->tctx_lock);
         return 0;
  err:
         if (sqd) {
diff --git a/io_uring/tctx.c b/io_uring/tctx.c

index 5b66755579c08fac74b77b3f76e51bdd6f5abf35..6d6f44215ec8077f21ba944a94ca4f8255fde738 100644 (file)
--- a/io_uring/tctx.c
+++ b/io_uring/tctx.c
@@ -136,9 +136,9 @@ int __io_uring_add_tctx_node(struct io_ring_ctx *ctx)
                         return ret;
                 }
  
-               mutex_lock(&ctx->uring_lock);
+               mutex_lock(&ctx->tctx_lock);
                 list_add(&node->ctx_node, &ctx->tctx_list);
-               mutex_unlock(&ctx->uring_lock);
+               mutex_unlock(&ctx->tctx_lock);
         }
         return 0;
  }
@@ -176,9 +176,9 @@ __cold void io_uring_del_tctx_node(unsigned long index)
         WARN_ON_ONCE(current != node->task);
         WARN_ON_ONCE(list_empty(&node->ctx_node));
  
-       mutex_lock(&node->ctx->uring_lock);
+       mutex_lock(&node->ctx->tctx_lock);
         list_del(&node->ctx_node);
-       mutex_unlock(&node->ctx->uring_lock);
+       mutex_unlock(&node->ctx->tctx_lock);
  
         if (tctx->last == node->ctx)
                 tctx->last = NULL;
author	Jens Axboe <axboe@kernel.dk>
	Wed, 31 Dec 2025 15:12:46 +0000 (08:12 -0700)
committer	Jens Axboe <axboe@kernel.dk>
	Thu, 1 Jan 2026 15:16:40 +0000 (08:16 -0700)
include/linux/io_uring_types.h		patch \| blob \| blame \| history
io_uring/cancel.c		patch \| blob \| blame \| history
io_uring/io_uring.c		patch \| blob \| blame \| history
io_uring/register.c		patch \| blob \| blame \| history
io_uring/tctx.c		patch \| blob \| blame \| history