From: Michael Guralnik Date: Wed, 10 Jun 2026 00:01:43 +0000 (+0300) Subject: RDMA/core: Fix FRMR handle leak on push failure X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=8c76126b866649d8e8acc09a06f2b03b6ff88900;p=thirdparty%2Flinux.git RDMA/core: Fix FRMR handle leak on push failure Failure to push a handle to the pool, caused by ENOMEM on queue page allocation, will trigger missing in_use counter update, skewing pool state indefinitely. Fix that by moving the handling of handle destruction in such case into the FRMR code, ensuring the handle is either pushed to the pool or destroyed inside the same function. Adjust mlx5_ib call site accordingly. Fixes: ce5df0b891ed ("IB/core: Introduce FRMR pools") Link: https://patch.msgid.link/r/20260610000145.820592-8-michaelgur@nvidia.com Signed-off-by: Michael Guralnik Signed-off-by: Jason Gunthorpe --- diff --git a/drivers/infiniband/core/frmr_pools.c b/drivers/infiniband/core/frmr_pools.c index 892aedfe03bed..e214a8273df84 100644 --- a/drivers/infiniband/core/frmr_pools.c +++ b/drivers/infiniband/core/frmr_pools.c @@ -549,9 +549,8 @@ EXPORT_SYMBOL(ib_frmr_pool_pop); * @device: The device to push the FRMR handle to. * @mr: The MR containing the FRMR handle to push back to the pool. * - * Returns 0 on success, negative error code on failure. */ -int ib_frmr_pool_push(struct ib_device *device, struct ib_mr *mr) +void ib_frmr_pool_push(struct ib_device *device, struct ib_mr *mr) { struct ib_frmr_pool *pool = mr->frmr.pool; struct ib_frmr_pools *pools = device->frmr_pools; @@ -559,19 +558,23 @@ int ib_frmr_pool_push(struct ib_device *device, struct ib_mr *mr) int ret; spin_lock(&pool->lock); + pool->in_use--; + ret = push_handle_to_queue_locked(&pool->queue, mr->frmr.handle); + /* Schedule aging every time an empty pool becomes non-empty */ - if (pool->queue.ci == 0) + if (!ret && pool->queue.ci == 1) schedule_aging = true; - ret = push_handle_to_queue_locked(&pool->queue, mr->frmr.handle); - if (ret == 0) - pool->in_use--; spin_unlock(&pool->lock); - if (ret == 0 && schedule_aging) + if (ret) { + pools->pool_ops->destroy_frmrs(device, &mr->frmr.handle, 1); + return; + } + + if (schedule_aging) queue_delayed_work(pools->aging_wq, &pool->aging_work, secs_to_jiffies(READ_ONCE(pools->aging_period_sec))); - return ret; } EXPORT_SYMBOL(ib_frmr_pool_push); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 6e7de9d2f0bd1..46cbdc86321f4 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1398,9 +1398,10 @@ static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr) bool is_odp = is_odp_mr(mr); int ret; - if (mr->ibmr.frmr.pool && !mlx5_umr_revoke_mr_with_lock(mr) && - !ib_frmr_pool_push(mr->ibmr.device, &mr->ibmr)) + if (mr->ibmr.frmr.pool && !mlx5_umr_revoke_mr_with_lock(mr)) { + ib_frmr_pool_push(mr->ibmr.device, &mr->ibmr); return 0; + } if (is_odp) mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); diff --git a/include/rdma/frmr_pools.h b/include/rdma/frmr_pools.h index af1b88801fa46..5b57bafa36366 100644 --- a/include/rdma/frmr_pools.h +++ b/include/rdma/frmr_pools.h @@ -34,6 +34,6 @@ int ib_frmr_pools_init(struct ib_device *device, const struct ib_frmr_pool_ops *pool_ops); void ib_frmr_pools_cleanup(struct ib_device *device); int ib_frmr_pool_pop(struct ib_device *device, struct ib_mr *mr); -int ib_frmr_pool_push(struct ib_device *device, struct ib_mr *mr); +void ib_frmr_pool_push(struct ib_device *device, struct ib_mr *mr); #endif /* FRMR_POOLS_H */