]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
RDMA/mlx5: Change default device for LAG slaves in RDMA TRANSPORT namespaces
authorPatrisious Haddad <phaddad@nvidia.com>
Wed, 29 Oct 2025 15:42:56 +0000 (17:42 +0200)
committerLeon Romanovsky <leon@kernel.org>
Sun, 9 Nov 2025 10:21:33 +0000 (05:21 -0500)
In case of a LAG configuration change the root namespace core device for
all of the LAG slaves to be the core device of the master device for
RDMA_TRANSPORT namespaces, in order to ensure all tables are created
through the master device.
Once the LAG is disabled revert back to the native core device.

Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Signed-off-by: Edward Srouji <edwards@nvidia.com>
Link: https://patch.msgid.link/20251029-support-other-eswitch-v1-4-98bb707b5d57@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
drivers/infiniband/hw/mlx5/ib_rep.c

index cc8859d3c2f539fca9ab70d02a7383ffe662dd6f..bbecca40517184b3627fe5cb8cc7d2d66726445e 100644 (file)
@@ -44,6 +44,63 @@ static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
        }
 }
 
+static int mlx5_ib_set_owner_transport(struct mlx5_core_dev *cur_owner,
+                                       struct mlx5_core_dev *new_owner)
+{
+       int ret;
+
+       if (!MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(cur_owner, ft_support) ||
+           !MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(cur_owner, ft_support))
+               return 0;
+
+       if (!MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager) ||
+           !MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager_other_eswitch))
+               return 0;
+
+       ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
+                                  FS_FT_RDMA_TRANSPORT_TX);
+       if (ret)
+               return ret;
+
+       ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
+                                  FS_FT_RDMA_TRANSPORT_RX);
+       if (ret) {
+               mlx5_fs_set_root_dev(cur_owner, cur_owner,
+                                    FS_FT_RDMA_TRANSPORT_TX);
+               return ret;
+       }
+
+       return 0;
+}
+
+static void mlx5_ib_release_transport(struct mlx5_core_dev *dev)
+{
+       struct mlx5_core_dev *peer_dev;
+       int i, ret;
+
+       mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+               ret = mlx5_ib_set_owner_transport(peer_dev, peer_dev);
+               WARN_ON_ONCE(ret);
+       }
+}
+
+static int mlx5_ib_take_transport(struct mlx5_core_dev *dev)
+{
+       struct mlx5_core_dev *peer_dev;
+       int ret;
+       int i;
+
+       mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+               ret = mlx5_ib_set_owner_transport(peer_dev, dev);
+               if (ret) {
+                       mlx5_ib_release_transport(dev);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
 static int
 mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 {
@@ -88,10 +145,18 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
        else
                return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
 
+       if (mlx5_lag_is_shared_fdb(dev)) {
+               ret = mlx5_ib_take_transport(lag_master);
+               if (ret)
+                       return ret;
+       }
+
        ibdev = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev,
                                         mlx5_core_net(lag_master));
-       if (!ibdev)
-               return -ENOMEM;
+       if (!ibdev) {
+               ret = -ENOMEM;
+               goto release_transport;
+       }
 
        ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port),
                              GFP_KERNEL);
@@ -127,6 +192,10 @@ fail_add:
        kfree(ibdev->port);
 fail_port:
        ib_dealloc_device(&ibdev->ib_dev);
+release_transport:
+       if (mlx5_lag_is_shared_fdb(lag_master))
+               mlx5_ib_release_transport(lag_master);
+
        return ret;
 }
 
@@ -182,6 +251,7 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
                                esw = peer_mdev->priv.eswitch;
                                mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
                        }
+                       mlx5_ib_release_transport(mdev);
                }
                __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
        }