Sashiko points out that, due to rereg_mr, the PD is actually variable and
all the touches in nldev are racy.
Use mr->device instead of mr->pd->device.
Getting the PD restrack ID is more tricky. To avoid disturbing all the
happy paths, add an rdma_restrack_sync() operation which is sort of like
flush_workqueue() or synchronize_irq(): after it returns, all the old
nldev touches to the mr are gone and everything sees the new PD. This
makes it safe to reach into the PD pointer.
Fixes: da5c85078215 ("RDMA/nldev: add driver-specific resource tracking")
Link: https://patch.msgid.link/r/4-v1-29ebd2c229b5+fd5-ib_mr_pd_jgg@nvidia.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_mr *mr = container_of(res, struct ib_mr, res);
- struct ib_device *dev = mr->pd->device;
+ struct ib_device *dev = mr->device;
if (has_cap_net_admin) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
return -EMSGSIZE;
- if (!rdma_is_kernel_res(res) &&
- nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
- return -EMSGSIZE;
+ if (!rdma_is_kernel_res(res)) {
+ struct ib_pd *pd = READ_ONCE(mr->pd);
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, pd->res.id))
+ return -EMSGSIZE;
+ }
if (fill_res_name_pid(msg, res))
return -EMSGSIZE;
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_mr *mr = container_of(res, struct ib_mr, res);
- struct ib_device *dev = mr->pd->device;
+ struct ib_device *dev = mr->device;
if (!dev->ops.fill_res_mr_entry_raw)
return -EINVAL;
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_mr *mr = container_of(res, struct ib_mr, res);
- struct ib_device *dev = mr->pd->device;
+ struct ib_device *dev = mr->device;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
goto err;
xa_lock(&rt->xa);
xas_for_each(&xas, e, U32_MAX) {
+ if (xa_is_zero(e))
+ continue;
if (xa_get_mark(&rt->xa, e->id, RESTRACK_DD) && !show_details)
continue;
cnt++;
}
EXPORT_SYMBOL(rdma_restrack_put);
+/**
+ * rdma_restrack_sync() - Fence concurrent netlink dumps on an entry
+ * @res: resource entry
+ *
+ * After this returns any concurrent netlink dump threads will see the current
+ * value of the object. This is useful if the object has to be changed and there
+ * is not locking to protect the nl side. Eg for mr->pd. This effectively
+ * destroys the object from a kref/xarray perspective and then immediately
+ * restores it. The kref is acting like a lock to barrier concurrent nl threads.
+ * Callers must ensure rdma_restrack_del() is not concurrently called.
+ */
+void rdma_restrack_sync(struct rdma_restrack_entry *res)
+{
+ struct rdma_restrack_entry *old;
+ struct rdma_restrack_root *rt;
+ struct task_struct *task;
+ struct ib_device *dev;
+
+ if (!res->valid || res->no_track)
+ return;
+
+ dev = res_to_dev(res);
+ if (WARN_ON(!dev))
+ return;
+
+ rt = &dev->res[res->type];
+ if (WARN_ON(xa_get_mark(&rt->xa, res->id, RESTRACK_DD)))
+ return;
+
+ old = xa_cmpxchg(&rt->xa, res->id, res, XA_ZERO_ENTRY, GFP_KERNEL);
+ if (WARN_ON(old != res))
+ return;
+
+ task = res->task;
+ if (task)
+ get_task_struct(task);
+ rdma_restrack_put(res);
+ wait_for_completion(&res->comp);
+ reinit_completion(&res->comp);
+ if (task)
+ res->task = task;
+ kref_init(&res->kref);
+
+ xa_cmpxchg(&rt->xa, res->id, XA_ZERO_ENTRY, res, GFP_KERNEL);
+}
+EXPORT_SYMBOL(rdma_restrack_sync);
+
/**
* rdma_restrack_del() - delete object from the resource tracking database
* @res: resource entry
void rdma_restrack_clean(struct ib_device *dev);
void rdma_restrack_add(struct rdma_restrack_entry *res);
void rdma_restrack_del(struct rdma_restrack_entry *res);
+void rdma_restrack_sync(struct rdma_restrack_entry *res);
void rdma_restrack_new(struct rdma_restrack_entry *res,
enum rdma_restrack_type type);
void rdma_restrack_set_name(struct rdma_restrack_entry *res,
#include "uverbs.h"
#include "core_priv.h"
+#include "restrack.h"
/*
* Copy a response to userspace. If the provided 'resp' is larger than the
ret = PTR_ERR(new_pd);
goto put_uobjs;
}
+ if (new_pd == orig_pd) {
+ uobj_put_obj_read(new_pd);
+ cmd.flags &= ~IB_MR_REREG_PD;
+ }
} else {
new_pd = mr->pd;
}
mr = new_mr;
} else {
if (cmd.flags & IB_MR_REREG_PD) {
- atomic_dec(&orig_pd->usecnt);
- mr->pd = new_pd;
atomic_inc(&new_pd->usecnt);
+ WRITE_ONCE(mr->pd, new_pd);
+ rdma_restrack_sync(&mr->res);
+ atomic_dec(&orig_pd->usecnt);
}
if (cmd.flags & IB_MR_REREG_TRANS) {
mr->iova = cmd.hca_va;
struct ib_mr {
struct ib_device *device;
+ /*
+ * Due to IB_MR_REREG_PD pd is not a fixed pointer and can change. For a
+ * user MR, this value should only be read from a system call that holds
+ * the uobject lock, or the driver should disable in-place REREG_PD.
+ */
struct ib_pd *pd;
u32 lkey;
u32 rkey;