]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
RDMA/nldev: Fix locking when accessing mr->pd
authorJason Gunthorpe <jgg@nvidia.com>
Thu, 4 Jun 2026 01:27:43 +0000 (22:27 -0300)
committerJason Gunthorpe <jgg@nvidia.com>
Mon, 8 Jun 2026 17:32:43 +0000 (14:32 -0300)
Sashiko points out that, due to rereg_mr, the PD is actually variable and
all the touches in nldev are racy.

Use mr->device instead of mr->pd->device.

Getting the PD restrack ID is more tricky. To avoid disturbing all the
happy paths, add an rdma_restrack_sync() operation which is sort of like
flush_workqueue() or synchronize_irq(): after it returns, all the old
nldev touches to the mr are gone and everything sees the new PD. This
makes it safe to reach into the PD pointer.

Fixes: da5c85078215 ("RDMA/nldev: add driver-specific resource tracking")
Link: https://patch.msgid.link/r/4-v1-29ebd2c229b5+fd5-ib_mr_pd_jgg@nvidia.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/core/nldev.c
drivers/infiniband/core/restrack.c
drivers/infiniband/core/restrack.h
drivers/infiniband/core/uverbs_cmd.c
include/rdma/ib_verbs.h

index 5aaba2b9746ba6ba82ef3e6485204ed924cab58f..02a0a9c0a4a6ad10589a0a419f932d47e7853146 100644 (file)
@@ -695,7 +695,7 @@ static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
                             struct rdma_restrack_entry *res, uint32_t port)
 {
        struct ib_mr *mr = container_of(res, struct ib_mr, res);
-       struct ib_device *dev = mr->pd->device;
+       struct ib_device *dev = mr->device;
 
        if (has_cap_net_admin) {
                if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
@@ -711,9 +711,12 @@ static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
        if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
                return -EMSGSIZE;
 
-       if (!rdma_is_kernel_res(res) &&
-           nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
-               return -EMSGSIZE;
+       if (!rdma_is_kernel_res(res)) {
+               struct ib_pd *pd = READ_ONCE(mr->pd);
+
+               if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, pd->res.id))
+                       return -EMSGSIZE;
+       }
 
        if (fill_res_name_pid(msg, res))
                return -EMSGSIZE;
@@ -727,7 +730,7 @@ static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
                                 struct rdma_restrack_entry *res, uint32_t port)
 {
        struct ib_mr *mr = container_of(res, struct ib_mr, res);
-       struct ib_device *dev = mr->pd->device;
+       struct ib_device *dev = mr->device;
 
        if (!dev->ops.fill_res_mr_entry_raw)
                return -EINVAL;
@@ -1017,7 +1020,7 @@ static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
                              struct rdma_restrack_entry *res, uint32_t port)
 {
        struct ib_mr *mr = container_of(res, struct ib_mr, res);
-       struct ib_device *dev = mr->pd->device;
+       struct ib_device *dev = mr->device;
 
        if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
                goto err;
index ac3688952cabbff1ebb899bacb78421f2515231b..cfee2071586c16e3dfc4e21e5d58f8488dfb277f 100644 (file)
@@ -71,6 +71,8 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type,
 
        xa_lock(&rt->xa);
        xas_for_each(&xas, e, U32_MAX) {
+               if (xa_is_zero(e))
+                       continue;
                if (xa_get_mark(&rt->xa, e->id, RESTRACK_DD) && !show_details)
                        continue;
                cnt++;
@@ -276,6 +278,53 @@ int rdma_restrack_put(struct rdma_restrack_entry *res)
 }
 EXPORT_SYMBOL(rdma_restrack_put);
 
+/**
+ * rdma_restrack_sync() - Fence concurrent netlink dumps on an entry
+ * @res:  resource entry
+ *
+ * After this returns any concurrent netlink dump threads will see the current
+ * value of the object. This is useful if the object has to be changed and there
+ * is not locking to protect the nl side. Eg for mr->pd. This effectively
+ * destroys the object from a kref/xarray perspective and then immediately
+ * restores it. The kref is acting like a lock to barrier concurrent nl threads.
+ * Callers must ensure rdma_restrack_del() is not concurrently called.
+ */
+void rdma_restrack_sync(struct rdma_restrack_entry *res)
+{
+       struct rdma_restrack_entry *old;
+       struct rdma_restrack_root *rt;
+       struct task_struct *task;
+       struct ib_device *dev;
+
+       if (!res->valid || res->no_track)
+               return;
+
+       dev = res_to_dev(res);
+       if (WARN_ON(!dev))
+               return;
+
+       rt = &dev->res[res->type];
+       if (WARN_ON(xa_get_mark(&rt->xa, res->id, RESTRACK_DD)))
+               return;
+
+       old = xa_cmpxchg(&rt->xa, res->id, res, XA_ZERO_ENTRY, GFP_KERNEL);
+       if (WARN_ON(old != res))
+               return;
+
+       task = res->task;
+       if (task)
+               get_task_struct(task);
+       rdma_restrack_put(res);
+       wait_for_completion(&res->comp);
+       reinit_completion(&res->comp);
+       if (task)
+               res->task = task;
+       kref_init(&res->kref);
+
+       xa_cmpxchg(&rt->xa, res->id, XA_ZERO_ENTRY, res, GFP_KERNEL);
+}
+EXPORT_SYMBOL(rdma_restrack_sync);
+
 /**
  * rdma_restrack_del() - delete object from the resource tracking database
  * @res:  resource entry
index 6a04fc41f738010a90d96f88dbcc88bc36d3a289..75b8d1005a984b21896e296ac6ace1415a90905f 100644 (file)
@@ -27,6 +27,7 @@ int rdma_restrack_init(struct ib_device *dev);
 void rdma_restrack_clean(struct ib_device *dev);
 void rdma_restrack_add(struct rdma_restrack_entry *res);
 void rdma_restrack_del(struct rdma_restrack_entry *res);
+void rdma_restrack_sync(struct rdma_restrack_entry *res);
 void rdma_restrack_new(struct rdma_restrack_entry *res,
                       enum rdma_restrack_type type);
 void rdma_restrack_set_name(struct rdma_restrack_entry *res,
index e16eacf40d17af2edeb1d09c5a04b33d14625339..aca7c6ab55cd815222b278f3ff6f8bc91d7917fb 100644 (file)
@@ -47,6 +47,7 @@
 
 #include "uverbs.h"
 #include "core_priv.h"
+#include "restrack.h"
 
 /*
  * Copy a response to userspace. If the provided 'resp' is larger than the
@@ -808,6 +809,10 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
                        ret = PTR_ERR(new_pd);
                        goto put_uobjs;
                }
+               if (new_pd == orig_pd) {
+                       uobj_put_obj_read(new_pd);
+                       cmd.flags &= ~IB_MR_REREG_PD;
+               }
        } else {
                new_pd = mr->pd;
        }
@@ -855,9 +860,10 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
                mr = new_mr;
        } else {
                if (cmd.flags & IB_MR_REREG_PD) {
-                       atomic_dec(&orig_pd->usecnt);
-                       mr->pd = new_pd;
                        atomic_inc(&new_pd->usecnt);
+                       WRITE_ONCE(mr->pd, new_pd);
+                       rdma_restrack_sync(&mr->res);
+                       atomic_dec(&orig_pd->usecnt);
                }
                if (cmd.flags & IB_MR_REREG_TRANS) {
                        mr->iova = cmd.hca_va;
index 0daa5089d539fdf0b299e15dd67d4b9e8e328545..794746de8db0ba25e0b21a6744f6637b1d8430db 100644 (file)
@@ -1977,6 +1977,11 @@ struct ib_dmah {
 
 struct ib_mr {
        struct ib_device  *device;
+       /*
+        * Due to IB_MR_REREG_PD pd is not a fixed pointer and can change. For a
+        * user MR, this value should only be read from a system call that holds
+        * the uobject lock, or the driver should disable in-place REREG_PD.
+        */
        struct ib_pd      *pd;
        u32                lkey;
        u32                rkey;