]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
xprtrdma: Fix ep kref imbalance on ADDR_CHANGE
authorChris Mason <clm@meta.com>
Thu, 4 Jun 2026 17:06:33 +0000 (13:06 -0400)
committerAnna Schumaker <anna.schumaker@hammerspace.com>
Wed, 10 Jun 2026 19:47:06 +0000 (15:47 -0400)
rpcrdma_cm_event_handler() falls through to the disconnected: label
on RDMA_CM_EVENT_ADDR_CHANGE and calls rpcrdma_ep_put() with no
matching get when the event arrives before RDMA_CM_EVENT_ESTABLISHED.
The kref then underflows during connect teardown and
rpcrdma_xprt_disconnect() operates on a freed ep.

Reference counts across a normal connection lifecycle:

    rpcrdma_ep_create()             kref_init     ->1
    rpcrdma_xprt_connect()          ep_get        ->2  (before post_recvs)
    RDMA_CM_EVENT_ESTABLISHED       ep_get        ->3
    RDMA_CM_EVENT_DISCONNECTED      ep_put        ->2
    rpcrdma_xprt_drain()            ep_put        ->1
    rpcrdma_xprt_disconnect() tail  ep_put        ->0  (ep_destroy)

The connect-time get in rpcrdma_xprt_connect(), taken just before
rpcrdma_post_recvs() "while there are outstanding Receives," is
balanced by rpcrdma_xprt_drain. ADDR_CHANGE before ESTABLISHED has
no get to consume, so its put drops the count to 1 and the drain
put then frees the ep while rpcrdma_xprt_disconnect() still holds a
pointer to it.

Fix by dispatching on the prior re_connect_status via xchg(): for
prev == 0 (pre-ESTABLISHED) wake the connect waiter and return with
no put; for prev == 1 call rpcrdma_force_disconnect() and return.
The case-1 arm relies on the subsequent RDMA_CM_EVENT_DISCONNECTED
event -- reliably delivered when rdma_disconnect() is called on a
still-connected cm_id -- to balance the ESTABLISHED get;
rpcrdma_xprt_drain() continues to balance only that connect-time
get. Any other prior value means teardown is already in flight.

Fixes: 2acc5cae2923 ("xprtrdma: Prevent dereferencing r_xprt->rx_ep after it is freed")
Assisted-by: kres:claude-opus-4-7
Signed-off-by: Chris Mason <clm@meta.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <anna.schumaker@hammerspace.com>
net/sunrpc/xprtrdma/verbs.c

index 993bc5c444a4bb1b8faa2953f900abc72d4f1fa4..7ddab4ed5d0366c01aa36cf0411ca8e70748e47b 100644 (file)
@@ -245,8 +245,17 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
                complete(&ep->re_done);
                return 0;
        case RDMA_CM_EVENT_ADDR_CHANGE:
-               ep->re_connect_status = -ENODEV;
-               goto disconnected;
+               switch (xchg(&ep->re_connect_status, -ENODEV)) {
+               case 0:
+                       goto wake_connect_worker;
+               case 1:
+                       /* The later DISCONNECTED event balances the
+                        * ESTABLISHED get; do not put here.
+                        */
+                       rpcrdma_force_disconnect(ep);
+                       return 0;
+               }
+               return 0;
        case RDMA_CM_EVENT_ESTABLISHED:
                rpcrdma_ep_get(ep);
                ep->re_connect_status = 1;
@@ -269,7 +278,6 @@ wake_connect_worker:
                return 0;
        case RDMA_CM_EVENT_DISCONNECTED:
                ep->re_connect_status = -ECONNABORTED;
-disconnected:
                rpcrdma_force_disconnect(ep);
                return rpcrdma_ep_put(ep);
        default: