]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
RDMA/mlx5: Use UMEM attribute for CQ doorbell record
authorJiri Pirko <jiri@nvidia.com>
Fri, 29 May 2026 13:43:11 +0000 (15:43 +0200)
committerJason Gunthorpe <jgg@nvidia.com>
Fri, 29 May 2026 23:19:59 +0000 (20:19 -0300)
Add an optional mlx5 driver-namespace UMEM attribute on CQ
create so userspace can supply the doorbell record buffer
explicitly. mlx5_ib_db_map_user() resolves the attribute (or
falls back to the legacy UHW VA) into a struct
ib_uverbs_buffer_desc and runs a unified lookup-then-pin:
VA-typed descriptors share a per-page umem across CQ/QP/SRQ
in the same process, FD-typed descriptors are pinned per call.

Link: https://patch.msgid.link/r/20260529134312.2836341-16-jiri@resnulli.us
Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/doorbell.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/mlx5/srq.c
include/uapi/rdma/mlx5_user_ioctl_cmds.h

index e8f8fcc106c820fa097b952be5ee385286aedee8..49b4bf148a4a017079c93a9b267690bf3a23169a 100644 (file)
@@ -761,7 +761,9 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
                goto err_umem;
        }
 
-       err = mlx5_ib_db_map_user(context, ucmd.db_addr, &cq->db);
+       err = mlx5_ib_db_map_user(context, attrs,
+                                 MLX5_IB_ATTR_CREATE_CQ_DBR_BUF_UMEM,
+                                 ucmd.db_addr, &cq->db);
        if (err)
                goto err_umem;
 
@@ -1521,7 +1523,9 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
        UVERBS_ATTR_PTR_IN(
                MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX,
                UVERBS_ATTR_TYPE(u32),
-               UA_OPTIONAL));
+               UA_OPTIONAL),
+       UVERBS_ATTR_UMEM(MLX5_IB_ATTR_CREATE_CQ_DBR_BUF_UMEM,
+                        UA_OPTIONAL));
 
 const struct uapi_definition mlx5_ib_create_cq_defs[] = {
        UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_CQ, &mlx5_ib_cq_create),
index 020c7032866342a712e66ec189abfbded9402c7e..3108894534a8a40fbcd68a96bf7c93ed4376974f 100644 (file)
 
 #include "mlx5_ib.h"
 
+#define MLX5_IB_DBR_SIZE (sizeof(__be32) * 2)
+
 struct mlx5_ib_user_db_page {
        struct list_head        list;
        struct ib_umem         *umem;
-       unsigned long           user_virt;
+       struct ib_uverbs_buffer_desc desc;
        int                     refcnt;
        struct mm_struct        *mm;
 };
 
-int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
-                       struct mlx5_db *db)
+static int mlx5_ib_db_map_user_desc(struct mlx5_ib_ucontext *context,
+                                   const struct ib_uverbs_buffer_desc *desc,
+                                   struct mlx5_db *db)
 {
        struct mlx5_ib_user_db_page *page;
+       struct ib_umem *umem;
        int err = 0;
 
+       if (desc->length < MLX5_IB_DBR_SIZE)
+               return -EINVAL;
+       /*
+        * For VA descriptors the umem is normalized to a single PAGE_SIZE
+        * region, so reject offsets that would place the 8-byte DBR
+        * straddling the page boundary.
+        */
+       if (desc->type == IB_UVERBS_BUFFER_TYPE_VA &&
+           (desc->addr & ~PAGE_MASK) > PAGE_SIZE - MLX5_IB_DBR_SIZE)
+               return -EINVAL;
+
        mutex_lock(&context->db_page_mutex);
 
-       list_for_each_entry(page, &context->db_page_list, list)
-               if ((current->mm == page->mm) &&
-                   (page->user_virt == (virt & PAGE_MASK)))
-                       goto found;
+       /*
+        * Only VA-typed descriptors are eligible to share a per-page
+        * doorbell umem; FD-typed descriptors are pinned individually.
+        */
+       if (desc->type == IB_UVERBS_BUFFER_TYPE_VA) {
+               list_for_each_entry(page, &context->db_page_list, list) {
+                       if (current->mm != page->mm)
+                               continue;
+                       if (page->desc.addr == (desc->addr & PAGE_MASK))
+                               goto found;
+               }
+       }
 
-       page = kmalloc_obj(*page);
+       page = kzalloc_obj(*page);
        if (!page) {
                err = -ENOMEM;
                goto out;
        }
 
-       page->user_virt = (virt & PAGE_MASK);
-       page->refcnt    = 0;
-       page->umem = ib_umem_get_va(context->ibucontext.device,
-                                   virt & PAGE_MASK, PAGE_SIZE, 0);
-       if (IS_ERR(page->umem)) {
-               err = PTR_ERR(page->umem);
+       page->desc = *desc;
+
+       /*
+        * Normalize VA descriptors to a page-aligned PAGE_SIZE region so
+        * multiple DBRs that fall in the same user page share one umem.
+        */
+       if (page->desc.type == IB_UVERBS_BUFFER_TYPE_VA) {
+               page->desc.addr &= PAGE_MASK;
+               page->desc.length = PAGE_SIZE;
+       }
+
+       umem = ib_umem_get_desc(context->ibucontext.device, &page->desc, 0);
+       if (IS_ERR(umem)) {
+               err = PTR_ERR(umem);
+               kfree(page);
+               goto out;
+       }
+
+       /*
+        * The 8-byte DBR is programmed to the device as one DMA address,
+        * so it must live in a single contiguous DMA segment.
+        */
+       if (!ib_umem_is_contiguous(umem)) {
+               ib_umem_release(umem);
                kfree(page);
+               err = -EINVAL;
                goto out;
        }
-       mmgrab(current->mm);
-       page->mm = current->mm;
 
+       page->umem = umem;
+       if (page->desc.type == IB_UVERBS_BUFFER_TYPE_VA) {
+               mmgrab(current->mm);
+               page->mm = current->mm;
+       }
        list_add(&page->list, &context->db_page_list);
 
 found:
        db->dma = sg_dma_address(page->umem->sgt_append.sgt.sgl) +
-                 (virt & ~PAGE_MASK);
+                 (desc->addr & ~PAGE_MASK);
        db->u.user_page = page;
        ++page->refcnt;
 
@@ -90,13 +135,35 @@ out:
        return err;
 }
 
+int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
+                       const struct uverbs_attr_bundle *attrs, u16 attr_id,
+                       unsigned long virt, struct mlx5_db *db)
+{
+       struct ib_uverbs_buffer_desc desc = {
+               .type = IB_UVERBS_BUFFER_TYPE_VA,
+               .addr = virt,
+               .length = MLX5_IB_DBR_SIZE,
+       };
+
+       if (attrs) {
+               int err;
+
+               err = uverbs_get_buffer_desc(attrs, attr_id, &desc);
+               if (err && err != -ENOENT)
+                       return err;
+       }
+
+       return mlx5_ib_db_map_user_desc(context, &desc, db);
+}
+
 void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db)
 {
        mutex_lock(&context->db_page_mutex);
 
        if (!--db->u.user_page->refcnt) {
                list_del(&db->u.user_page->list);
-               mmdrop(db->u.user_page->mm);
+               if (db->u.user_page->mm)
+                       mmdrop(db->u.user_page->mm);
                ib_umem_release(db->u.user_page->umem);
                kfree(db->u.user_page);
        }
index e156dc4d752996cc4ae465bb567b0c1305d07fed..078f281bcdac920e0363cb7c907e24b3df2c94dc 100644 (file)
@@ -1259,8 +1259,9 @@ to_mmmap(struct rdma_user_mmap_entry *rdma_entry)
 
 int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev);
 int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev);
-int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
-                       struct mlx5_db *db);
+int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
+                       const struct uverbs_attr_bundle *attrs, u16 attr_id,
+                       unsigned long virt, struct mlx5_db *db);
 void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db);
 void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
 void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
index dd56118005313d88f8d411f54cde8462e33b4ba6..58997714df700ae0f73ab579f02e509f4cd1d432 100644 (file)
@@ -914,7 +914,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                ib_umem_num_pages(rwq->umem), page_size, rwq->rq_num_pas,
                offset);
 
-       err = mlx5_ib_db_map_user(ucontext, ucmd->db_addr, &rwq->db);
+       err = mlx5_ib_db_map_user(ucontext, NULL, 0, ucmd->db_addr, &rwq->db);
        if (err) {
                mlx5_ib_dbg(dev, "map failed\n");
                goto err_umem;
@@ -1053,7 +1053,7 @@ static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
                resp->bfreg_index = MLX5_IB_INVALID_BFREG;
        qp->bfregn = bfregn;
 
-       err = mlx5_ib_db_map_user(context, ucmd->db_addr, &qp->db);
+       err = mlx5_ib_db_map_user(context, NULL, 0, ucmd->db_addr, &qp->db);
        if (err) {
                mlx5_ib_dbg(dev, "map failed\n");
                goto err_free;
index 44903015c6c9e262412eb42dffefcd1658e07aec..5bc48fef374432818f899126ccc821b256281f73 100644 (file)
@@ -74,7 +74,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
        }
        in->umem = srq->umem;
 
-       err = mlx5_ib_db_map_user(ucontext, ucmd.db_addr, &srq->db);
+       err = mlx5_ib_db_map_user(ucontext, NULL, 0, ucmd.db_addr, &srq->db);
        if (err) {
                mlx5_ib_dbg(dev, "map doorbell failed\n");
                goto err_umem;
index 01a2a050e4689f31024ba7df75de86072c2b97af..b63e75034cda2f8b32845e9980c5bfbfc4eb5d1a 100644 (file)
@@ -274,6 +274,7 @@ enum mlx5_ib_device_query_context_attrs {
 
 enum mlx5_ib_create_cq_attrs {
        MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX = UVERBS_ID_DRIVER_NS_WITH_UHW,
+       MLX5_IB_ATTR_CREATE_CQ_DBR_BUF_UMEM,
 };
 
 enum mlx5_ib_reg_dmabuf_mr_attrs {