From: Chuck Lever Date: Thu, 4 Jun 2026 17:06:40 +0000 (-0400) Subject: xprtrdma: Return sendctx slot after Send preparation failure X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=60e7870052f417d83965db144f70ae21fcfcf37f;p=thirdparty%2Flinux.git xprtrdma: Return sendctx slot after Send preparation failure rpcrdma_prepare_send_sges() gets a sendctx before it maps the SGEs for the Send WR. If one of the mapping helpers fails, no Send WR is posted, so no Send completion is guaranteed to advance rb_sc_tail. Current cleanup clears sc_req so a later completion can sweep over that slot, but a consecutive run of preparation failures can still advance rb_sc_head until the ring appears full. At that point rpcrdma_sendctx_get_locked() returns NULL and no Send can be posted to produce the completion needed to recover the ring. The trigger requires CONFIG_SUNRPC_XPRT_RDMA and an NFS/RDMA mount. Mount setup and reliable DMA-map fault injection require local admin authority. Unprivileged I/O on an existing mount can exercise the send path, but a remote peer alone cannot force this local DMA-map failure. Add rpcrdma_sendctx_unget_locked() for the single-consumer send path to rewind rb_sc_head when the just-acquired sendctx is canceled before ib_post_send(). Wake waiters after making the slot available again. After the rewind, every slot the completion sweep visits belongs to a posted Send, so rpcrdma_sendctx_put_locked() no longer needs to test sc_req before unmapping. Fixes: ae72950abf99 ("xprtrdma: Add data structure to manage RDMA Send arguments") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 3f50828802de8..1285f04cdac14 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -747,6 +747,7 @@ inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdr, enum rpcrdma_chunktype rtype) { + struct rpcrdma_sendctx *sc; int ret; ret = -EAGAIN; @@ -789,7 +790,9 @@ inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, return 0; out_unmap: - rpcrdma_sendctx_cancel(req->rl_sendctx); + sc = req->rl_sendctx; + rpcrdma_sendctx_cancel(sc); + rpcrdma_sendctx_unget_locked(r_xprt, sc); out_nosc: trace_xprtrdma_prepsend_failed(&req->rl_slot, ret); return ret; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 8392ba4bcdcae..04b286223b24c 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -631,6 +631,11 @@ static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt) /* The QP is drained, but the final unsignaled Sends might not * have been walked by a signaled Send completion. Release those * Send owners before request buffers are reset. + * + * Unlike the completion sweep, this walk can visit slots with + * no Send posted: after a partial rpcrdma_sendctxs_create() + * failure on reconnect, rb_sc_head and rb_sc_tail are stale, + * and slots between them can be NULL or have sc_req clear. */ for (i = rpcrdma_sendctx_next(buf, buf->rb_sc_tail); i != rpcrdma_sendctx_next(buf, buf->rb_sc_head); @@ -703,6 +708,12 @@ static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf, return likely(item < buf->rb_sc_last) ? item + 1 : 0; } +static unsigned long rpcrdma_sendctx_prev(struct rpcrdma_buffer *buf, + unsigned long item) +{ + return item > 0 ? item - 1 : buf->rb_sc_last; +} + /** * rpcrdma_sendctx_get_locked - Acquire a send context * @r_xprt: controlling transport instance @@ -759,6 +770,29 @@ out_emptyq: return NULL; } +/** + * rpcrdma_sendctx_unget_locked - Release an unposted send context + * @r_xprt: controlling transport instance + * @sc: send context to release + * + * Usage: Called when no Send is posted for the sendctx most + * recently returned by rpcrdma_sendctx_get_locked(). + * + * The caller serializes calls to this function and to + * rpcrdma_sendctx_get_locked() (per transport). + */ +void rpcrdma_sendctx_unget_locked(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_sendctx *sc) +{ + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + + if (WARN_ON_ONCE(buf->rb_sc_ctxs[buf->rb_sc_head] != sc)) + return; + + buf->rb_sc_head = rpcrdma_sendctx_prev(buf, buf->rb_sc_head); + xprt_write_space(&r_xprt->rx_xprt); +} + /** * rpcrdma_sendctx_put_locked - Release a send context * @r_xprt: controlling transport instance @@ -776,8 +810,7 @@ static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, unsigned long next_tail; /* Release previously completed but unsignaled Sends by walking - * up the queue until @sc is found. Entries left behind by a - * failed rpcrdma_prepare_send_sges() have sc_req cleared. + * up the queue until @sc is found. */ next_tail = buf->rb_sc_tail; do { @@ -787,8 +820,7 @@ static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, /* ORDER: item must be accessed _before_ tail is updated */ cur = buf->rb_sc_ctxs[next_tail]; - if (cur->sc_req) - rpcrdma_sendctx_unmap(cur); + rpcrdma_sendctx_unmap(cur); } while (buf->rb_sc_ctxs[next_tail] != sc); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index ae036719f840c..4cbc941e4a3eb 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -495,6 +495,8 @@ void rpcrdma_req_destroy(struct rpcrdma_req *req); int rpcrdma_buffer_create(struct rpcrdma_xprt *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt); +void rpcrdma_sendctx_unget_locked(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_sendctx *sc); struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt); void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt);