From: Chuck Lever Date: Fri, 22 May 2026 18:13:57 +0000 (-0400) Subject: svcrdma: wake sq waiters when the transport closes X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e5248a7426030db1e126363f72afdb3b71339a5c;p=thirdparty%2Fkernel%2Flinux.git svcrdma: wake sq waiters when the transport closes Threads parked in svc_rdma_sq_wait() on sc_sq_ticket_wait or sc_send_wait can hang indefinitely in TASK_UNINTERRUPTIBLE state across transport teardown, pinning svc_xprt references and blocking svc_rdma_free(). The close path sets XPT_CLOSE before invoking xpo_detach and both wait_event predicates include an XPT_CLOSE term, but the predicates are re-evaluated only on wakeup. sc_sq_ticket_wait has no completion-driven wake path; it is advanced solely by the chained ticket handoff inside svc_rdma_sq_wait() itself. Without an explicit wake at close, parked threads never observe XPT_CLOSE, hold their svc_xprt_get reference forever, and svc_rdma_free() blocks on xpt_ref dropping to zero. Two close entry points reach this transport. Local teardown runs svc_rdma_detach() from svc_handle_xprt() -> svc_delete_xprt() -> xpo_detach() on a worker thread. A remote disconnect arrives at svc_rdma_cma_handler(), which calls svc_xprt_deferred_close(): that sets XPT_CLOSE and enqueues the transport but does not access either RDMA waitqueue, so a worker already parked in svc_rdma_sq_wait() never re-evaluates its predicate. With every worker parked on this transport, no thread is available to run the local teardown either, and the wake site there is unreachable. Introduce svc_rdma_xprt_deferred_close(), a thin svcrdma wrapper that calls svc_xprt_deferred_close() and then wakes both sc_sq_ticket_wait and sc_send_wait. Convert the svcrdma producers that called svc_xprt_deferred_close() directly: svc_rdma_cma_handler(), qp_event_handler(), svc_rdma_post_send_err(), svc_rdma_wc_send(), the sendto drop path, the rw completion error paths, and the recvfrom flush and read-list error paths. Wake both waitqueues from svc_rdma_detach() as well. The synchronous svc_xprt_close() path (backchannel ENOTCONN, device removal via svc_rdma_xprt_done) reaches detach without flowing through svc_xprt_deferred_close() and therefore does not invoke the new helper. Fixes: ccc89b9d1ed2 ("svcrdma: Add fair queuing for Send Queue access") Cc: stable@vger.kernel.org Assisted-by: kres (claude-opus-4-7) Signed-off-by: Chris Mason [ cel: add svc_rdma_xprt_deferred_close() to complete the fix ] Signed-off-by: Chuck Lever --- diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 4ba39f07371df..5aadb47b3b0e9 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -328,6 +328,7 @@ extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset, unsigned int length); /* svc_rdma_transport.c */ +extern void svc_rdma_xprt_deferred_close(struct svcxprt_rdma *rdma); extern struct svc_xprt_class svc_rdma_class; #ifdef CONFIG_SUNRPC_BACKCHANNEL extern struct svc_xprt_class svc_rdma_bc_class; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 19503a12d0a21..fe9bf0371b6ec 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -383,7 +383,7 @@ flushed: trace_svcrdma_wc_recv_err(wc, &ctxt->rc_cid); dropped: svc_rdma_recv_ctxt_put(rdma, ctxt); - svc_xprt_deferred_close(&rdma->sc_xprt); + svc_rdma_xprt_deferred_close(rdma); } /** @@ -1010,7 +1010,7 @@ out_readlist: if (ret == -EINVAL) svc_rdma_send_error(rdma_xprt, ctxt, ret); svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); - svc_xprt_deferred_close(xprt); + svc_rdma_xprt_deferred_close(rdma_xprt); return ret; } return 0; diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 13554793b039d..f7fd22cc4a598 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -304,7 +304,7 @@ static void svc_rdma_reply_done(struct ib_cq *cq, struct ib_wc *wc) trace_svcrdma_wc_reply_err(wc, &cc->cc_cid); } - svc_xprt_deferred_close(&rdma->sc_xprt); + svc_rdma_xprt_deferred_close(rdma); } /** @@ -336,7 +336,7 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) * some of the outgoing RPC message. Signal the loss * to the client by closing the connection. */ - svc_xprt_deferred_close(&rdma->sc_xprt); + svc_rdma_xprt_deferred_close(rdma); } /** @@ -381,7 +381,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc) */ svc_rdma_cc_release(rdma, cc, DMA_FROM_DEVICE); svc_rdma_recv_ctxt_put(rdma, ctxt); - svc_xprt_deferred_close(&rdma->sc_xprt); + svc_rdma_xprt_deferred_close(rdma); } /* diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index eceefd21bec8e..7f6d17bf8c1fd 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -438,7 +438,7 @@ int svc_rdma_post_send_err(struct svcxprt_rdma *rdma, int sqecount, int ret) { trace_svcrdma_sq_post_err(rdma, cid, ret); - svc_xprt_deferred_close(&rdma->sc_xprt); + svc_rdma_xprt_deferred_close(rdma); /* If even one WR was posted, a Send completion will * return the reserved SQ slots. @@ -480,7 +480,7 @@ flushed: else trace_svcrdma_wc_send_flush(wc, &ctxt->sc_cid); svc_rdma_send_ctxt_put(rdma, ctxt); - svc_xprt_deferred_close(&rdma->sc_xprt); + svc_rdma_xprt_deferred_close(rdma); } /** @@ -1201,7 +1201,7 @@ put_ctxt: svc_rdma_send_ctxt_put(rdma, sctxt); drop_connection: trace_svcrdma_send_err(rqstp, ret); - svc_xprt_deferred_close(&rdma->sc_xprt); + svc_rdma_xprt_deferred_close(rdma); return -ENOTCONN; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index f99cd6177504b..7ca71741106b1 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -98,10 +98,27 @@ struct svc_xprt_class svc_rdma_class = { .xcl_ident = XPRT_TRANSPORT_RDMA, }; +/** + * svc_rdma_xprt_deferred_close - Close an RDMA transport (deferred) + * @rdma: transport to close + */ +void svc_rdma_xprt_deferred_close(struct svcxprt_rdma *rdma) +{ + svc_xprt_deferred_close(&rdma->sc_xprt); + + /* Release parked sc_sq_ticket_wait and sc_send_wait waiters. + * Once XPT_CLOSE is observed each returns -ENOTCONN. + */ + wake_up_all(&rdma->sc_sq_ticket_wait); + wake_up_all(&rdma->sc_send_wait); +} + /* QP event handler */ static void qp_event_handler(struct ib_event *event, void *context) { struct svc_xprt *xprt = context; + struct svcxprt_rdma *rdma = + container_of(xprt, struct svcxprt_rdma, sc_xprt); trace_svcrdma_qp_error(event, (struct sockaddr *)&xprt->xpt_remote); switch (event->event) { @@ -119,7 +136,7 @@ static void qp_event_handler(struct ib_event *event, void *context) case IB_EVENT_QP_ACCESS_ERR: case IB_EVENT_DEVICE_FATAL: default: - svc_xprt_deferred_close(xprt); + svc_rdma_xprt_deferred_close(rdma); break; } } @@ -341,7 +358,7 @@ static int svc_rdma_cma_handler(struct rdma_cm_id *cma_id, svc_xprt_enqueue(xprt); break; case RDMA_CM_EVENT_DISCONNECTED: - svc_xprt_deferred_close(xprt); + svc_rdma_xprt_deferred_close(rdma); break; default: break; @@ -598,6 +615,15 @@ static void svc_rdma_detach(struct svc_xprt *xprt) container_of(xprt, struct svcxprt_rdma, sc_xprt); rdma_disconnect(rdma->sc_cm_id); + + /* + * Most close paths go through svc_rdma_xprt_deferred_close(), + * which wakes the SQ waitqueues. svc_xprt_close() reaches + * detach without that helper, so wake any threads parked in + * svc_rdma_sq_wait() here as well. + */ + wake_up_all(&rdma->sc_sq_ticket_wait); + wake_up_all(&rdma->sc_send_wait); } /**