]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
IB/cm: Fix av cm device leak on an error path in cm_init_av_by_path()
authorJason Gunthorpe <jgg@nvidia.com>
Tue, 2 Jun 2026 19:37:28 +0000 (16:37 -0300)
committerJason Gunthorpe <jgg@nvidia.com>
Fri, 5 Jun 2026 16:07:45 +0000 (13:07 -0300)
Codex pointed out that cm_init_av_by_path() can call cm_set_av_port()
which takes a reference on the cm device, but then can immediately return
error if ib_init_ah_attr_from_path() fails.

Since callers like ib_send_cm_req() put the av on the stack this leaks
that cm device reference.

Re-order cm_init_av_by_path() so it doesn't touch the av until it has done
all its failable work, and then update the av in one shot so it is either
left alone or fully init'd.

Sashiko also pointed out that the cm_destroy_av() prior to
cm_init_av_by_path() is harmful as it leaves the AV broken in the error
case and thus the REJ won't send. Since cm_init_av_by_path() is now atomic
it is safe to delete the cm_destroy_av(). On succees the av from
cm_init_av_for_response() is cleaned up by cm_init_av_by_path(), on
failure the 'goto rejected' guarentees the av is destroyed during
ib_destroy_cm_id().

Fixes: 76039ac9095f ("IB/cm: Protect cm_dev, cm_ports and mad_agent with kref and lock")
Link: https://patch.msgid.link/r/0-v1-38292501f539+14f-ib_cm_av_leak_jgg@nvidia.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/core/cm.c

index 6ab9a0aee1ec606a31bbda0722fd7056a1300b66..1a2c2775b14d56b6de25570d7e2c195c557dc6ec 100644 (file)
@@ -530,6 +530,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
        struct rdma_ah_attr new_ah_attr;
        struct cm_device *cm_dev;
        struct cm_port *port;
+       u16 pkey_index;
        int ret;
 
        port = get_cm_port_from_path(path, sgid_attr);
@@ -538,12 +539,10 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
        cm_dev = port->cm_dev;
 
        ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
-                                 be16_to_cpu(path->pkey), &av->pkey_index);
+                                 be16_to_cpu(path->pkey), &pkey_index);
        if (ret)
                return ret;
 
-       cm_set_av_port(av, port);
-
        /*
         * av->ah_attr might be initialized based on wc or during
         * request processing time which might have reference to sgid_attr.
@@ -558,6 +557,8 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
        if (ret)
                return ret;
 
+       av->pkey_index = pkey_index;
+       cm_set_av_port(av, port);
        av->timeout = path->packet_life_time + 1;
        rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
        return 0;
@@ -2184,8 +2185,10 @@ static int cm_req_handler(struct cm_work *work)
                                 cm_id_priv->av.ah_attr.roce.dmac);
        work->path[0].hop_limit = grh->hop_limit;
 
-       /* This destroy call is needed to pair with cm_init_av_for_response */
-       cm_destroy_av(&cm_id_priv->av);
+       /*
+        * cm_init_av_by_path() will internally pair with the above
+        * cm_init_av_for_response() if it succeeds.
+        */
        ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av);
        if (ret) {
                int err;