]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
svcrdma: wake sq waiters when the transport closes
authorChuck Lever <chuck.lever@oracle.com>
Fri, 22 May 2026 18:13:57 +0000 (14:13 -0400)
committerChuck Lever <cel@kernel.org>
Tue, 9 Jun 2026 20:32:59 +0000 (16:32 -0400)
Threads parked in svc_rdma_sq_wait() on sc_sq_ticket_wait or
sc_send_wait can hang indefinitely in TASK_UNINTERRUPTIBLE state
across transport teardown, pinning svc_xprt references and
blocking svc_rdma_free().

The close path sets XPT_CLOSE before invoking xpo_detach and both
wait_event predicates include an XPT_CLOSE term, but the
predicates are re-evaluated only on wakeup. sc_sq_ticket_wait has
no completion-driven wake path; it is advanced solely by the
chained ticket handoff inside svc_rdma_sq_wait() itself. Without
an explicit wake at close, parked threads never observe
XPT_CLOSE, hold their svc_xprt_get reference forever, and
svc_rdma_free() blocks on xpt_ref dropping to zero.

Two close entry points reach this transport. Local teardown runs
svc_rdma_detach() from svc_handle_xprt() -> svc_delete_xprt() ->
xpo_detach() on a worker thread. A remote disconnect arrives at
svc_rdma_cma_handler(), which calls svc_xprt_deferred_close():
that sets XPT_CLOSE and enqueues the transport but does not
access either RDMA waitqueue, so a worker already parked in
svc_rdma_sq_wait() never re-evaluates its predicate. With every
worker parked on this transport, no thread is available to run
the local teardown either, and the wake site there is
unreachable.

Introduce svc_rdma_xprt_deferred_close(), a thin svcrdma wrapper
that calls svc_xprt_deferred_close() and then wakes both
sc_sq_ticket_wait and sc_send_wait. Convert the svcrdma producers
that called svc_xprt_deferred_close() directly:
svc_rdma_cma_handler(), qp_event_handler(),
svc_rdma_post_send_err(), svc_rdma_wc_send(), the sendto drop
path, the rw completion error paths, and the recvfrom flush and
read-list error paths.

Wake both waitqueues from svc_rdma_detach() as well. The
synchronous svc_xprt_close() path (backchannel ENOTCONN, device
removal via svc_rdma_xprt_done) reaches detach without flowing
through svc_xprt_deferred_close() and therefore does not invoke
the new helper.

Fixes: ccc89b9d1ed2 ("svcrdma: Add fair queuing for Send Queue access")
Cc: stable@vger.kernel.org
Assisted-by: kres (claude-opus-4-7)
Signed-off-by: Chris Mason <clm@meta.com>
[ cel: add svc_rdma_xprt_deferred_close() to complete the fix ]
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
include/linux/sunrpc/svc_rdma.h
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
net/sunrpc/xprtrdma/svc_rdma_rw.c
net/sunrpc/xprtrdma/svc_rdma_sendto.c
net/sunrpc/xprtrdma/svc_rdma_transport.c

index 4ba39f07371df8749259fd8d7aa163e93119de47..5aadb47b3b0e9aca35666657d28b2acd29616929 100644 (file)
@@ -328,6 +328,7 @@ extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
                                   unsigned int length);
 
 /* svc_rdma_transport.c */
+extern void svc_rdma_xprt_deferred_close(struct svcxprt_rdma *rdma);
 extern struct svc_xprt_class svc_rdma_class;
 #ifdef CONFIG_SUNRPC_BACKCHANNEL
 extern struct svc_xprt_class svc_rdma_bc_class;
index 19503a12d0a2150c2963ec2a62d53395aed6af31..fe9bf0371b6ec0b08d532a24bb623c6b4335123b 100644 (file)
@@ -383,7 +383,7 @@ flushed:
                trace_svcrdma_wc_recv_err(wc, &ctxt->rc_cid);
 dropped:
        svc_rdma_recv_ctxt_put(rdma, ctxt);
-       svc_xprt_deferred_close(&rdma->sc_xprt);
+       svc_rdma_xprt_deferred_close(rdma);
 }
 
 /**
@@ -1010,7 +1010,7 @@ out_readlist:
                if (ret == -EINVAL)
                        svc_rdma_send_error(rdma_xprt, ctxt, ret);
                svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
-               svc_xprt_deferred_close(xprt);
+               svc_rdma_xprt_deferred_close(rdma_xprt);
                return ret;
        }
        return 0;
index 13554793b039d364bd13197339264047b4eb52d2..f7fd22cc4a59825bf5cd000fb60be6a6d69ad19d 100644 (file)
@@ -304,7 +304,7 @@ static void svc_rdma_reply_done(struct ib_cq *cq, struct ib_wc *wc)
                trace_svcrdma_wc_reply_err(wc, &cc->cc_cid);
        }
 
-       svc_xprt_deferred_close(&rdma->sc_xprt);
+       svc_rdma_xprt_deferred_close(rdma);
 }
 
 /**
@@ -336,7 +336,7 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
         * some of the outgoing RPC message. Signal the loss
         * to the client by closing the connection.
         */
-       svc_xprt_deferred_close(&rdma->sc_xprt);
+       svc_rdma_xprt_deferred_close(rdma);
 }
 
 /**
@@ -381,7 +381,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
         */
        svc_rdma_cc_release(rdma, cc, DMA_FROM_DEVICE);
        svc_rdma_recv_ctxt_put(rdma, ctxt);
-       svc_xprt_deferred_close(&rdma->sc_xprt);
+       svc_rdma_xprt_deferred_close(rdma);
 }
 
 /*
index eceefd21bec8ed5571879447ff26149a5f191be7..7f6d17bf8c1fdf60df87e6395c2a87e38e884fa9 100644 (file)
@@ -438,7 +438,7 @@ int svc_rdma_post_send_err(struct svcxprt_rdma *rdma,
                           int sqecount, int ret)
 {
        trace_svcrdma_sq_post_err(rdma, cid, ret);
-       svc_xprt_deferred_close(&rdma->sc_xprt);
+       svc_rdma_xprt_deferred_close(rdma);
 
        /* If even one WR was posted, a Send completion will
         * return the reserved SQ slots.
@@ -480,7 +480,7 @@ flushed:
        else
                trace_svcrdma_wc_send_flush(wc, &ctxt->sc_cid);
        svc_rdma_send_ctxt_put(rdma, ctxt);
-       svc_xprt_deferred_close(&rdma->sc_xprt);
+       svc_rdma_xprt_deferred_close(rdma);
 }
 
 /**
@@ -1201,7 +1201,7 @@ put_ctxt:
        svc_rdma_send_ctxt_put(rdma, sctxt);
 drop_connection:
        trace_svcrdma_send_err(rqstp, ret);
-       svc_xprt_deferred_close(&rdma->sc_xprt);
+       svc_rdma_xprt_deferred_close(rdma);
        return -ENOTCONN;
 }
 
index f99cd6177504b880e42220c22d38ba3202b7f4b6..7ca71741106b1842a50ce7e3498e429c19b52745 100644 (file)
@@ -98,10 +98,27 @@ struct svc_xprt_class svc_rdma_class = {
        .xcl_ident = XPRT_TRANSPORT_RDMA,
 };
 
+/**
+ * svc_rdma_xprt_deferred_close - Close an RDMA transport (deferred)
+ * @rdma: transport to close
+ */
+void svc_rdma_xprt_deferred_close(struct svcxprt_rdma *rdma)
+{
+       svc_xprt_deferred_close(&rdma->sc_xprt);
+
+       /* Release parked sc_sq_ticket_wait and sc_send_wait waiters.
+        * Once XPT_CLOSE is observed each returns -ENOTCONN.
+        */
+       wake_up_all(&rdma->sc_sq_ticket_wait);
+       wake_up_all(&rdma->sc_send_wait);
+}
+
 /* QP event handler */
 static void qp_event_handler(struct ib_event *event, void *context)
 {
        struct svc_xprt *xprt = context;
+       struct svcxprt_rdma *rdma =
+               container_of(xprt, struct svcxprt_rdma, sc_xprt);
 
        trace_svcrdma_qp_error(event, (struct sockaddr *)&xprt->xpt_remote);
        switch (event->event) {
@@ -119,7 +136,7 @@ static void qp_event_handler(struct ib_event *event, void *context)
        case IB_EVENT_QP_ACCESS_ERR:
        case IB_EVENT_DEVICE_FATAL:
        default:
-               svc_xprt_deferred_close(xprt);
+               svc_rdma_xprt_deferred_close(rdma);
                break;
        }
 }
@@ -341,7 +358,7 @@ static int svc_rdma_cma_handler(struct rdma_cm_id *cma_id,
                svc_xprt_enqueue(xprt);
                break;
        case RDMA_CM_EVENT_DISCONNECTED:
-               svc_xprt_deferred_close(xprt);
+               svc_rdma_xprt_deferred_close(rdma);
                break;
        default:
                break;
@@ -598,6 +615,15 @@ static void svc_rdma_detach(struct svc_xprt *xprt)
                container_of(xprt, struct svcxprt_rdma, sc_xprt);
 
        rdma_disconnect(rdma->sc_cm_id);
+
+       /*
+        * Most close paths go through svc_rdma_xprt_deferred_close(),
+        * which wakes the SQ waitqueues. svc_xprt_close() reaches
+        * detach without that helper, so wake any threads parked in
+        * svc_rdma_sq_wait() here as well.
+        */
+       wake_up_all(&rdma->sc_sq_ticket_wait);
+       wake_up_all(&rdma->sc_send_wait);
 }
 
 /**