From: Patrisious Haddad Date: Wed, 29 Oct 2025 15:42:56 +0000 (+0200) Subject: RDMA/mlx5: Change default device for LAG slaves in RDMA TRANSPORT namespaces X-Git-Tag: v6.19-rc1~130^2~47 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3506242da07156e6804c061554bd01d77c1b463b;p=thirdparty%2Fkernel%2Flinux.git RDMA/mlx5: Change default device for LAG slaves in RDMA TRANSPORT namespaces In case of a LAG configuration change the root namespace core device for all of the LAG slaves to be the core device of the master device for RDMA_TRANSPORT namespaces, in order to ensure all tables are created through the master device. Once the LAG is disabled revert back to the native core device. Signed-off-by: Patrisious Haddad Signed-off-by: Edward Srouji Link: https://patch.msgid.link/20251029-support-other-eswitch-v1-4-98bb707b5d57@nvidia.com Signed-off-by: Leon Romanovsky --- diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index cc8859d3c2f53..bbecca4051718 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -44,6 +44,63 @@ static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports) } } +static int mlx5_ib_set_owner_transport(struct mlx5_core_dev *cur_owner, + struct mlx5_core_dev *new_owner) +{ + int ret; + + if (!MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(cur_owner, ft_support) || + !MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(cur_owner, ft_support)) + return 0; + + if (!MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager) || + !MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager_other_eswitch)) + return 0; + + ret = mlx5_fs_set_root_dev(cur_owner, new_owner, + FS_FT_RDMA_TRANSPORT_TX); + if (ret) + return ret; + + ret = mlx5_fs_set_root_dev(cur_owner, new_owner, + FS_FT_RDMA_TRANSPORT_RX); + if (ret) { + mlx5_fs_set_root_dev(cur_owner, cur_owner, + FS_FT_RDMA_TRANSPORT_TX); + return ret; + } + + return 0; +} + +static void mlx5_ib_release_transport(struct mlx5_core_dev *dev) +{ + struct mlx5_core_dev *peer_dev; + int i, ret; + + mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { + ret = mlx5_ib_set_owner_transport(peer_dev, peer_dev); + WARN_ON_ONCE(ret); + } +} + +static int mlx5_ib_take_transport(struct mlx5_core_dev *dev) +{ + struct mlx5_core_dev *peer_dev; + int ret; + int i; + + mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { + ret = mlx5_ib_set_owner_transport(peer_dev, dev); + if (ret) { + mlx5_ib_release_transport(dev); + return ret; + } + } + + return 0; +} + static int mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { @@ -88,10 +145,18 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) else return mlx5_ib_set_vport_rep(lag_master, rep, vport_index); + if (mlx5_lag_is_shared_fdb(dev)) { + ret = mlx5_ib_take_transport(lag_master); + if (ret) + return ret; + } + ibdev = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev, mlx5_core_net(lag_master)); - if (!ibdev) - return -ENOMEM; + if (!ibdev) { + ret = -ENOMEM; + goto release_transport; + } ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port), GFP_KERNEL); @@ -127,6 +192,10 @@ fail_add: kfree(ibdev->port); fail_port: ib_dealloc_device(&ibdev->ib_dev); +release_transport: + if (mlx5_lag_is_shared_fdb(lag_master)) + mlx5_ib_release_transport(lag_master); + return ret; } @@ -182,6 +251,7 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) esw = peer_mdev->priv.eswitch; mlx5_eswitch_unregister_vport_reps(esw, REP_IB); } + mlx5_ib_release_transport(mdev); } __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); }