]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
RDMA/mlx4: Fix mis-use of RCU in mlx4_srq_event()
authorJason Gunthorpe <jgg@nvidia.com>
Tue, 28 Apr 2026 16:17:45 +0000 (13:17 -0300)
committerJason Gunthorpe <jgg@nvidia.com>
Sat, 2 May 2026 18:30:48 +0000 (15:30 -0300)
Sashiko points out the radix_tree itself is RCU safe, but nothing ever
frees the mlx4_srq struct with RCU, and it isn't even accessed within the
RCU critical section. It also will crash if an event is delivered before
the srq object is finished initializing.

Use the spinlock since it isn't easy to make RCU work, use
refcount_inc_not_zero() to protect against partially initialized objects,
and order the refcount_set() to be after the srq is fully initialized.

Cc: stable@vger.kernel.org
Fixes: 30353bfc43a1 ("net/mlx4_core: Use RCU to perform radix tree lookup for SRQ")
Link: https://sashiko.dev/#/patchset/0-v2-1c49eeb88c48%2B91-rdma_udata_rep_jgg%40nvidia.com?part=5
Link: https://patch.msgid.link/r/12-v1-41f3135e5565+9d2-rdma_ai_fixes1_jgg@nvidia.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/net/ethernet/mellanox/mlx4/srq.c

index dd890f5d7b725c58b36a3f33d34c6181ea621cff..8711689120f302edd9abfe7c6631fbd035ca87b8 100644 (file)
@@ -44,13 +44,14 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type)
 {
        struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
        struct mlx4_srq *srq;
+       unsigned long flags;
 
-       rcu_read_lock();
+       spin_lock_irqsave(&srq_table->lock, flags);
        srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1));
-       rcu_read_unlock();
-       if (srq)
-               refcount_inc(&srq->refcount);
-       else {
+       if (!srq || !refcount_inc_not_zero(&srq->refcount))
+               srq = NULL;
+       spin_unlock_irqrestore(&srq_table->lock, flags);
+       if (!srq) {
                mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn);
                return;
        }
@@ -203,8 +204,8 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
        if (err)
                goto err_radix;
 
-       refcount_set(&srq->refcount, 1);
        init_completion(&srq->free);
+       refcount_set_release(&srq->refcount, 1);
 
        return 0;