]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
xprtrdma: Close lost-wakeup race in xprt_rdma_alloc_slot
authorChuck Lever <chuck.lever@oracle.com>
Fri, 6 Mar 2026 21:56:24 +0000 (16:56 -0500)
committerTrond Myklebust <trond.myklebust@hammerspace.com>
Mon, 13 Apr 2026 18:56:20 +0000 (11:56 -0700)
xprt_rdma_alloc_slot() and xprt_rdma_free_slot() lack serialization
between the buffer pool and the backlog queue.  A buffer freed
after rpcrdma_buffer_get() finds the pool empty but before
rpc_sleep_on() places the task on the backlog is returned to the
pool with no waiter to wake, leaving the task stuck on the backlog
indefinitely.

After joining the backlog, re-check the pool and route any
recovered buffer through xprt_wake_up_backlog(), whose queue lock
serializes with concurrent wakeups and avoids double-assignment
of slots.

Because xprt_rdma_free_slot() does not hold reserve_lock, the
XPRT_CONGESTED double-check in xprt_throttle_congested() is
ineffective: a task can join the backlog through that path after
free_slot has already found it empty and cleared the bit.  Avoid
this by using xprt_add_backlog_noncongested(), which queues the
task without setting XPRT_CONGESTED, so every allocation reaches
xprt_rdma_alloc_slot() and its post-sleep re-check.

Fixes: edb41e61a54e ("xprtrdma: Make rpc_rqst part of rpcrdma_req")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
include/linux/sunrpc/xprt.h
net/sunrpc/xprt.c
net/sunrpc/xprtrdma/transport.c

index f46d1fb8f71ae22233f15691b2cf312f73656d9d..a82045804d34ed3995b1b4f58f0bc5595d9db16d 100644 (file)
@@ -404,6 +404,8 @@ struct rpc_xprt *   xprt_alloc(struct net *net, size_t size,
                                unsigned int max_req);
 void                   xprt_free(struct rpc_xprt *);
 void                   xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task);
+void                   xprt_add_backlog_noncongested(struct rpc_xprt *xprt,
+                                       struct rpc_task *task);
 bool                   xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req);
 void                   xprt_cleanup_ids(void);
 
index 4fbb57a2970488cef9ed17a90ed34e356d4480d5..48a3618cbb29f4700daf4b048eeb02cd8c3071a1 100644 (file)
@@ -1663,6 +1663,22 @@ void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
 }
 EXPORT_SYMBOL_GPL(xprt_add_backlog);
 
+/**
+ * xprt_add_backlog_noncongested - queue task on backlog
+ * @xprt: transport whose backlog queue receives the task
+ * @task: task to queue
+ *
+ * Like xprt_add_backlog, but does not set XPRT_CONGESTED.
+ * For transports whose free_slot path does not synchronize
+ * with xprt_throttle_congested via reserve_lock.
+ */
+void xprt_add_backlog_noncongested(struct rpc_xprt *xprt,
+                                  struct rpc_task *task)
+{
+       rpc_sleep_on(&xprt->backlog, task, xprt_complete_request_init);
+}
+EXPORT_SYMBOL_GPL(xprt_add_backlog_noncongested);
+
 static bool __xprt_set_rq(struct rpc_task *task, void *data)
 {
        struct rpc_rqst *req = data;
index ca079439f9cceb39f2b490dc9c89d99fc7fbdfd7..61706df5e485ac6d95e892a53ac56290a68291ae 100644 (file)
@@ -511,7 +511,20 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
 
 out_sleep:
        task->tk_status = -EAGAIN;
-       xprt_add_backlog(xprt, task);
+       xprt_add_backlog_noncongested(xprt, task);
+       /* A buffer freed between buffer_get and rpc_sleep_on
+        * goes back to the pool with no waiter to wake.
+        * Re-check after joining the backlog to close that gap.
+        */
+       req = rpcrdma_buffer_get(&r_xprt->rx_buf);
+       if (req) {
+               struct rpc_rqst *rqst = &req->rl_slot;
+
+               if (!xprt_wake_up_backlog(xprt, rqst)) {
+                       memset(rqst, 0, sizeof(*rqst));
+                       rpcrdma_buffer_put(&r_xprt->rx_buf, req);
+               }
+       }
 }
 
 /**