]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
net/mlx5: SD, support switchdev mode transition with shared FDB
authorShay Drory <shayd@nvidia.com>
Fri, 12 Jun 2026 11:38:56 +0000 (14:38 +0300)
committerJakub Kicinski <kuba@kernel.org>
Mon, 15 Jun 2026 18:40:50 +0000 (11:40 -0700)
When the eswitch transitions, propagate the change to SD: secondaries
get their TX flow table root reconfigured for the new mode, and when
all group devices move to switchdev, the per-group shared FDB is
activated.

Shared FDB activation is best-effort - failure does not block the
eswitch transition; the next transition retries.

Note: the existing mlx5_get_sd() guard that blocks switchdev for SD
devices is intentionally retained. It will be removed once all
supporting patches are in place.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260612113904.537595-8-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h

index 366531d8ef023c28e9425d818a37055743029602..915571a1586c7f07c87ef3c5579c438d24b5c37f 100644 (file)
@@ -46,6 +46,7 @@
 #include "fs_core.h"
 #include "lib/mlx5.h"
 #include "lib/devcom.h"
+#include "lib/sd.h"
 #include "lib/eq.h"
 #include "lib/fs_chains.h"
 #include "en_tc.h"
@@ -3164,6 +3165,9 @@ static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev,
        vport = mlx5_eswitch_get_vport(dev->priv.eswitch,
                                       dev->priv.eswitch->manager_vport);
 
+       if (!vport->egress.acl)
+               return;
+
        esw_acl_egress_ofld_bounce_rule_destroy(vport, MLX5_CAP_GEN(slave_dev, vhca_id));
 
        if (xa_empty(&vport->egress.offloads.bounce_rules)) {
@@ -3182,6 +3186,9 @@ int mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw,
        if (err)
                return err;
 
+       if (!mlx5_sd_is_primary(slave_esw->dev))
+               return 0;
+
        err = esw_set_master_egress_rule(master_esw->dev,
                                         slave_esw->dev, max_slaves);
        if (err)
@@ -3401,7 +3408,7 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw,
                return;
 
        if ((MLX5_VPORT_MANAGER(esw->dev) || mlx5_core_is_ecpf_esw_manager(esw->dev)) &&
-           !mlx5_lag_is_supported(esw->dev))
+           (!mlx5_lag_is_supported(esw->dev) && !mlx5_get_sd(esw->dev)))
                return;
 
        xa_init(&esw->paired);
@@ -4306,6 +4313,9 @@ unlock:
        mlx5_esw_unlock(esw);
 enable_lag:
        mlx5_lag_enable_change(esw->dev);
+       /* Shared FDB activation is creating LAG which is changing reps. */
+       if (!err)
+               mlx5_sd_eswitch_mode_set(esw->dev, mlx5_mode);
        return err;
 }
 
index c670ed1dd63cfbdaa6404556721359f407e0eb9d..b35795bac098322c27b3b2db68da7ca552f3fd66 100644 (file)
@@ -5,6 +5,8 @@
 #include "../lag/lag.h"
 #include "mlx5_core.h"
 #include "lib/mlx5.h"
+#include "devlink.h"
+#include "eswitch.h"
 #include "fs_cmd.h"
 #include <linux/mlx5/eswitch.h>
 #include <linux/mlx5/vport.h>
@@ -33,6 +35,8 @@ struct mlx5_sd {
                struct { /* secondary */
                        struct mlx5_core_dev *primary_dev;
                        u32 alias_obj_id;
+                       /* TX flow table root in switchdev (silent) config */
+                       bool tx_root_silent;
                };
        };
 };
@@ -672,6 +676,29 @@ static void sd_secondary_destroy_alias_ft(struct mlx5_core_dev *secondary)
                                   MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS);
 }
 
+static int mlx5_sd_secondary_conf_tx_root(struct mlx5_core_dev *secondary,
+                                         bool disconnect)
+{
+       struct mlx5_sd *sd = mlx5_get_sd(secondary);
+       int err;
+
+       /* Idempotent: skip if TX root is already in the requested state. */
+       if (sd->tx_root_silent == disconnect)
+               return 0;
+
+       if (disconnect)
+               err = mlx5_fs_cmd_set_tx_flow_table_root(secondary, 0, true);
+       else
+               err = mlx5_fs_cmd_set_tx_flow_table_root(secondary,
+                                                        sd->alias_obj_id,
+                                                        false);
+       if (err)
+               return err;
+
+       sd->tx_root_silent = disconnect;
+       return 0;
+}
+
 static int sd_cmd_set_secondary(struct mlx5_core_dev *secondary,
                                struct mlx5_core_dev *primary,
                                u8 *alias_key)
@@ -691,9 +718,11 @@ static int sd_cmd_set_secondary(struct mlx5_core_dev *secondary,
        if (err)
                goto err_unset_silent;
 
-       err = mlx5_fs_cmd_set_tx_flow_table_root(secondary, sd->alias_obj_id, false);
+       err = mlx5_fs_cmd_set_tx_flow_table_root(secondary, sd->alias_obj_id,
+                                                false);
        if (err)
                goto err_destroy_alias_ft;
+       sd->tx_root_silent = false;
 
        return 0;
 
@@ -710,7 +739,7 @@ static void sd_cmd_unset_secondary(struct mlx5_core_dev *secondary)
        struct mlx5_sd *primary_sd;
 
        primary_sd = mlx5_get_sd(mlx5_sd_get_primary(secondary));
-       mlx5_fs_cmd_set_tx_flow_table_root(secondary, 0, true);
+       mlx5_sd_secondary_conf_tx_root(secondary, true);
        sd_secondary_destroy_alias_ft(secondary);
        if (!primary_sd->fw_silents_secondaries)
                mlx5_fs_cmd_set_l2table_entry_silent(secondary, 0);
@@ -939,6 +968,111 @@ struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev,
        return &primary_adev->adev;
 }
 
+#ifdef CONFIG_MLX5_ESWITCH
+/* All SD members must have completed esw_offloads_enable (i.e., reached
+ * mlx5_esw_offloads_devcom_init) and become eswitch-peers of the primary.
+ * Until then, mlx5_eswitch_is_peer() returns false for the not-yet-paired
+ * member and shared_fdb_supported_filter would reject. When all PFs transition
+ * in parallel, only the last one to finish satisfies this gate; the earlier
+ * ones return 0 silently here.
+ */
+static bool mlx5_sd_all_paired(struct mlx5_core_dev *primary)
+{
+       struct mlx5_eswitch *primary_esw = primary->priv.eswitch;
+       struct mlx5_core_dev *pos;
+       int i;
+
+       mlx5_sd_for_each_secondary(i, primary, pos) {
+               if (!mlx5_eswitch_is_peer(primary_esw, pos->priv.eswitch))
+                       return false;
+       }
+       return true;
+}
+
+static void mlx5_sd_activate_shared_fdb(struct mlx5_core_dev *primary)
+{
+       struct mlx5_sd *sd = mlx5_get_sd(primary);
+       struct mlx5_lag *ldev;
+       struct lag_func *pf;
+       int err;
+       int i;
+
+       ldev = mlx5_lag_dev(primary);
+       if (!ldev) {
+               sd_warn(primary, "Shared FDB MUST have ldev\n");
+               return;
+       }
+
+       mutex_lock(&ldev->lock);
+
+       if (ldev->mode_changes_in_progress)
+               goto unlock;
+
+       if (!mlx5_sd_all_paired(primary))
+               goto unlock;
+
+       /* Check if SD FDB is already active for this group */
+       mlx5_lag_for_each(i, 0, ldev, sd->group_id) {
+               pf = mlx5_lag_pf(ldev, i);
+               if (pf->sd_fdb_active)
+                       goto unlock;
+               break;
+       }
+
+       if (!mlx5_lag_shared_fdb_supported_filter(ldev, sd->group_id)) {
+               sd_warn(primary, "Shared FDB not supported\n");
+               goto unlock;
+       }
+
+       err = mlx5_lag_shared_fdb_create(ldev, NULL, 0, sd->group_id);
+       if (err)
+               sd_warn(primary, "Failed to create shared FDB: %d\n", err);
+       else
+               sd_info(primary, "Shared FDB created\n");
+
+unlock:
+       mutex_unlock(&ldev->lock);
+}
+
+void mlx5_sd_eswitch_mode_set(struct mlx5_core_dev *dev, u16 mlx5_mode)
+{
+       struct mlx5_core_dev *primary;
+       struct mlx5_sd *sd;
+       int err;
+
+       sd = mlx5_get_sd(dev);
+       if (!sd || !mlx5_devcom_comp_is_ready(sd->devcom))
+               return;
+
+       mlx5_devcom_comp_lock(sd->devcom);
+       if (!mlx5_devcom_comp_is_ready(sd->devcom))
+               goto unlock;
+
+       primary = mlx5_sd_get_primary(dev);
+
+       /* Secondary devices need TX root reconfiguration */
+       if (dev != primary) {
+               bool disconnect = (mlx5_mode == MLX5_ESWITCH_OFFLOADS);
+
+               err = mlx5_sd_secondary_conf_tx_root(dev, disconnect);
+               if (err) {
+                       sd_warn(dev, "Failed to set TX root: %d\n", err);
+                       goto unlock;
+               }
+       }
+
+       /* Try to activate shared FDB when all devices are in switchdev.
+        * Shared FDB is optional - failure here doesn't fail the transition.
+        */
+       if (mlx5_mode == MLX5_ESWITCH_OFFLOADS)
+               mlx5_sd_activate_shared_fdb(primary);
+
+unlock:
+       mlx5_devcom_comp_unlock(sd->devcom);
+}
+
+#endif /* CONFIG_MLX5_ESWITCH */
+
 void mlx5_sd_put_adev(struct auxiliary_device *actual_adev,
                      struct auxiliary_device *adev)
 {
index 7a41adbcee71b84ae43c4509f017d05388ee5781..cb88bf34079adb9eaa5519e61115469c502f3907 100644 (file)
@@ -45,6 +45,13 @@ mlx5_sd_get_devcom(struct mlx5_core_dev *dev)
 }
 #endif
 
+#ifdef CONFIG_MLX5_ESWITCH
+void mlx5_sd_eswitch_mode_set(struct mlx5_core_dev *dev, u16 mlx5_mode);
+#else
+static inline void
+mlx5_sd_eswitch_mode_set(struct mlx5_core_dev *dev, u16 mlx5_mode) { return; }
+#endif
+
 #define mlx5_sd_for_each_dev_from_to(i, primary, ix_from, to, pos)     \
        for (i = ix_from;                                                       \
             (pos = mlx5_sd_primary_get_peer(primary, i)) && pos != (to); i++)