]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
net/mlx5: LAG, introduce software vport LAG implementation
authorShay Drory <shayd@nvidia.com>
Fri, 12 Jun 2026 11:39:00 +0000 (14:39 +0300)
committerJakub Kicinski <kuba@kernel.org>
Mon, 15 Jun 2026 18:40:50 +0000 (11:40 -0700)
SD LAG is a virtual LAG without hardware LAG support, so it cannot use
the firmware vport LAG commands. Implement a software-based vport LAG
using egress ACL bounce rules.

Add esw_set_slave_egress_rule() to create an egress ACL rule on the
slave's manager vport that bounces traffic to the master's manager
vport. This achieves the same traffic steering as hardware vport LAG.

Redirect mlx5_cmd_create_vport_lag() and mlx5_cmd_destroy_vport_lag()
to the software implementation when operating in SD LAG mode.
In addition, adjust lag_demux creation to check SD LAG mode as well.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20260612113904.537595-12-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
drivers/net/ethernet/mellanox/mlx5/core/lag/shared_fdb.c

index 94a530d19828b66de2bb026e51a34581c066eece..a5f0774834febc16f7cd5d583f99e8aab9f250ef 100644 (file)
@@ -950,6 +950,10 @@ void esw_vport_change_handle_locked(struct mlx5_vport *vport);
 
 bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller);
 
+int mlx5_eswitch_offloads_vport_lag_add_one(struct mlx5_eswitch *master_esw,
+                                           struct mlx5_eswitch *slave_esw);
+void mlx5_eswitch_offloads_vport_lag_del_one(struct mlx5_eswitch *master_esw,
+                                            struct mlx5_eswitch *slave_esw);
 int mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw,
                                             struct mlx5_eswitch *slave_esw, int max_slaves);
 void mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw,
index 915571a1586c7f07c87ef3c5579c438d24b5c37f..a24719cfba34f87f77558454e72f9cdd3b1c7d41 100644 (file)
@@ -3041,6 +3041,136 @@ static int __esw_set_master_egress_rule(struct mlx5_core_dev *master,
        return err;
 }
 
+static int esw_slave_egress_create_resources(struct mlx5_eswitch *esw,
+                                            struct mlx5_vport *vport)
+{
+       struct mlx5_flow_table_attr ft_attr = {
+               .max_fte = 1, .prio = 0, .level = 0,
+       };
+       int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+       struct mlx5_flow_namespace *ns;
+       struct mlx5_flow_table *acl;
+       struct mlx5_flow_group *g;
+       u32 *flow_group_in;
+       int err = 0;
+
+       if (vport->egress.acl)
+               return 0;
+
+       xa_init_flags(&vport->egress.offloads.bounce_rules, XA_FLAGS_ALLOC);
+       ns = mlx5_get_flow_vport_namespace(esw->dev,
+                                          MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+                                          vport->index);
+       if (!ns)
+               return -EINVAL;
+
+       flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+       if (!flow_group_in)
+               return -ENOMEM;
+
+       if (vport->vport || mlx5_core_is_ecpf(esw->dev))
+               ft_attr.flags = MLX5_FLOW_TABLE_OTHER_VPORT;
+
+       acl = mlx5_create_vport_flow_table(ns, &ft_attr, vport->vport);
+       if (IS_ERR(acl)) {
+               err = PTR_ERR(acl);
+               goto out;
+       }
+
+       g = mlx5_create_flow_group(acl, flow_group_in);
+       if (IS_ERR(g)) {
+               err = PTR_ERR(g);
+               goto err_table;
+       }
+
+       vport->egress.acl = acl;
+       vport->egress.offloads.bounce_grp = g;
+       vport->egress.type = VPORT_EGRESS_ACL_TYPE_SHARED_FDB;
+       err = 0;
+
+err_table:
+       if (err && !IS_ERR_OR_NULL(acl)) {
+               mlx5_destroy_flow_table(acl);
+               vport->egress.acl = NULL;
+       }
+out:
+       kvfree(flow_group_in);
+       return err;
+}
+
+static void esw_slave_egress_destroy_resources(struct mlx5_vport *vport)
+{
+       if (!IS_ERR_OR_NULL(vport->egress.offloads.bounce_grp)) {
+               mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp);
+               vport->egress.offloads.bounce_grp = NULL;
+       }
+       if (!IS_ERR_OR_NULL(vport->egress.acl)) {
+               esw_acl_egress_ofld_cleanup(vport);
+               xa_destroy(&vport->egress.offloads.bounce_rules);
+       }
+}
+
+static int esw_set_slave_egress_rule(struct mlx5_core_dev *master,
+                                    struct mlx5_core_dev *slave)
+{
+       struct mlx5_eswitch *slave_esw = slave->priv.eswitch;
+       u16 master_vhca = MLX5_CAP_GEN(master, vhca_id);
+       struct mlx5_flow_destination dest = {};
+       struct mlx5_flow_handle *bounce_rule;
+       struct mlx5_flow_act flow_act = {};
+       struct mlx5_vport *slave_vport;
+       int err;
+
+       slave_vport = mlx5_eswitch_get_vport(slave_esw,
+                                            slave_esw->manager_vport);
+       if (IS_ERR(slave_vport))
+               return PTR_ERR(slave_vport);
+
+       err = esw_slave_egress_create_resources(slave_esw, slave_vport);
+       if (err)
+               return err;
+
+       flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+       dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+       dest.vport.num = master->priv.eswitch->manager_vport;
+       dest.vport.vhca_id = master_vhca;
+       dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
+
+       bounce_rule = mlx5_add_flow_rules(slave_vport->egress.acl, NULL,
+                                         &flow_act, &dest, 1);
+       if (IS_ERR(bounce_rule)) {
+               err = PTR_ERR(bounce_rule);
+               goto err_rule;
+       }
+       err = xa_insert(&slave_vport->egress.offloads.bounce_rules,
+                       master_vhca, bounce_rule, GFP_KERNEL);
+       if (err)
+               goto err_insert;
+
+       return 0;
+err_insert:
+       mlx5_del_flow_rules(bounce_rule);
+err_rule:
+       esw_slave_egress_destroy_resources(slave_vport);
+       return err;
+}
+
+static void esw_unset_slave_egress_rule(struct mlx5_core_dev *master,
+                                       struct mlx5_core_dev *slave)
+{
+       struct mlx5_eswitch *slave_esw = slave->priv.eswitch;
+       u16 master_vhca = MLX5_CAP_GEN(master, vhca_id);
+       struct mlx5_vport *slave_vport;
+
+       slave_vport = mlx5_eswitch_get_vport(slave_esw,
+                                            slave_esw->manager_vport);
+       if (IS_ERR(slave_vport))
+               return;
+
+       esw_acl_egress_ofld_bounce_rule_destroy(slave_vport, master_vhca);
+       esw_slave_egress_destroy_resources(slave_vport);
+}
+
 static int esw_master_egress_create_resources(struct mlx5_eswitch *esw,
                                              struct mlx5_flow_namespace *egress_ns,
                                              struct mlx5_vport *vport, size_t count)
@@ -3208,6 +3338,18 @@ void mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw,
        esw_unset_master_egress_rule(master_esw->dev, slave_esw->dev);
 }
 
+int mlx5_eswitch_offloads_vport_lag_add_one(struct mlx5_eswitch *master_esw,
+                                           struct mlx5_eswitch *slave_esw)
+{
+       return esw_set_slave_egress_rule(master_esw->dev, slave_esw->dev);
+}
+
+void mlx5_eswitch_offloads_vport_lag_del_one(struct mlx5_eswitch *master_esw,
+                                            struct mlx5_eswitch *slave_esw)
+{
+       esw_unset_slave_egress_rule(master_esw->dev, slave_esw->dev);
+}
+
 #define ESW_OFFLOADS_DEVCOM_PAIR       (0)
 #define ESW_OFFLOADS_DEVCOM_UNPAIR     (1)
 
index 84eff995cad1e1e8f79f7567481b836e50ee5dca..06e1a61d1f58b9f056269a61f6d74ffb2bd76bd3 100644 (file)
@@ -139,9 +139,44 @@ static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
        return mlx5_cmd_exec_in(dev, modify_lag, in);
 }
 
+static u32 mlx5_lag_dev_group_id(struct mlx5_core_dev *dev)
+{
+       struct mlx5_lag *ldev = mlx5_lag_dev(dev);
+       struct lag_func *pf;
+       int i;
+
+       if (!ldev)
+               return 0;
+
+       mlx5_lag_for_each(i, 0, ldev, MLX5_LAG_FILTER_ALL) {
+               pf = mlx5_lag_pf(ldev, i);
+               if (pf->dev == dev)
+                       return pf->sd_fdb_active ? pf->group_id : 0;
+       }
+       return 0;
+}
+
+static int mlx5_lag_is_sw_lag(struct mlx5_core_dev *dev)
+{
+       return mlx5_lag_is_sd(dev);
+}
+
 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
 {
        u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
+       struct mlx5_lag *ldev = mlx5_lag_dev(dev);
+       int ret;
+
+       if (mlx5_lag_is_sw_lag(dev)) {
+               if (!ldev)
+                       return -ENODEV;
+
+               mutex_lock(&ldev->lock);
+               ret = mlx5_lag_create_vport_lag(mlx5_lag_dev(dev),
+                                               mlx5_lag_dev_group_id(dev));
+               mutex_unlock(&ldev->lock);
+               return ret;
+       }
 
        MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
 
@@ -152,6 +187,18 @@ EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
 {
        u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
+       struct mlx5_lag *ldev = mlx5_lag_dev(dev);
+
+       if (mlx5_lag_is_sw_lag(dev)) {
+               if (!ldev)
+                       return 0;
+
+               mutex_lock(&ldev->lock);
+               mlx5_lag_destroy_vport_lag(mlx5_lag_dev(dev),
+                                          mlx5_lag_dev_group_id(dev));
+               mutex_unlock(&ldev->lock);
+               return 0;
+       }
 
        MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
 
@@ -1663,7 +1710,7 @@ int mlx5_lag_demux_init(struct mlx5_core_dev *dev,
 
        xa_init(&pf->lag_demux_rules);
 
-       if (mlx5_get_sd(dev))
+       if (mlx5_lag_is_sw_lag(dev))
                return mlx5_lag_demux_ft_fg_init(dev, ft_attr, pf);
 
        return mlx5_lag_demux_fw_init(dev, ft_attr, pf);
index d645c2cfca4de1b7193934f91ade2ef703d80d4f..57e6f82713b0ab7edb927eb3142a6428289e1623 100644 (file)
@@ -175,6 +175,8 @@ int mlx5_lag_shared_fdb_create(struct mlx5_lag *ldev,
                               enum mlx5_lag_mode mode,
                               u32 group_id);
 void mlx5_lag_shared_fdb_destroy(struct mlx5_lag *ldev, u32 group_id);
+int mlx5_lag_create_vport_lag(struct mlx5_lag *ldev, u32 group_id);
+int mlx5_lag_destroy_vport_lag(struct mlx5_lag *ldev, u32 group_id);
 int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev);
 void mlx5_lag_destroy_single_fdb(struct mlx5_lag *ldev);
 bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev);
@@ -191,6 +193,18 @@ static inline int mlx5_lag_shared_fdb_create(struct mlx5_lag *ldev,
 static inline void mlx5_lag_shared_fdb_destroy(struct mlx5_lag *ldev,
                                               u32 group_id) {}
 
+static inline int mlx5_lag_create_vport_lag(struct mlx5_lag *ldev,
+                                           u32 group_id)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int mlx5_lag_destroy_vport_lag(struct mlx5_lag *ldev,
+                                            u32 group_id)
+{
+       return -EOPNOTSUPP;
+}
+
 static inline int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
 {
        return -EOPNOTSUPP;
index 1371e14c4c13121ba89f53252bd66f284df5b3c6..8d4f2903a1016c60081165cf3e6dbbae210cb35c 100644 (file)
@@ -89,6 +89,76 @@ err:
        return err;
 }
 
+int mlx5_lag_create_vport_lag(struct mlx5_lag *ldev, u32 group_id)
+{
+       u32 filter = group_id ? group_id : MLX5_LAG_FILTER_ALL;
+       int master_idx = mlx5_lag_get_dev_index_by_seq_filter(ldev, MLX5_LAG_P1,
+                                                            filter);
+       struct mlx5_eswitch *master_esw;
+       struct mlx5_core_dev *dev0;
+       int i, j;
+       int err;
+
+       if (master_idx < 0)
+               return -EINVAL;
+
+       dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
+       master_esw = dev0->priv.eswitch;
+
+       mlx5_lag_for_each(i, 0, ldev, filter) {
+               struct mlx5_eswitch *slave_esw;
+
+               if (i == master_idx)
+                       continue;
+
+               slave_esw = mlx5_lag_pf(ldev, i)->dev->priv.eswitch;
+               err = mlx5_eswitch_offloads_vport_lag_add_one(master_esw,
+                                                             slave_esw);
+               if (err)
+                       goto err;
+       }
+
+       return 0;
+
+err:
+       mlx5_lag_for_each_reverse(j, i - 1, 0, ldev, filter) {
+               struct mlx5_eswitch *slave_esw;
+
+               if (j == master_idx)
+                       continue;
+               slave_esw = mlx5_lag_pf(ldev, j)->dev->priv.eswitch;
+               mlx5_eswitch_offloads_vport_lag_del_one(master_esw, slave_esw);
+       }
+       return err;
+}
+
+int mlx5_lag_destroy_vport_lag(struct mlx5_lag *ldev, u32 group_id)
+{
+       u32 filter = group_id ? group_id : MLX5_LAG_FILTER_ALL;
+       int master_idx = mlx5_lag_get_dev_index_by_seq_filter(ldev, MLX5_LAG_P1,
+                                                            filter);
+       struct mlx5_eswitch *master_esw;
+       struct mlx5_core_dev *dev0;
+       int i;
+
+       if (master_idx < 0)
+               return 0;
+
+       dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
+       master_esw = dev0->priv.eswitch;
+
+       mlx5_lag_for_each(i, 0, ldev, filter) {
+               struct mlx5_core_dev *dev;
+
+               if (i == master_idx)
+                       continue;
+               dev = mlx5_lag_pf(ldev, i)->dev;
+               mlx5_eswitch_offloads_vport_lag_del_one(master_esw,
+                                                       dev->priv.eswitch);
+       }
+       return 0;
+}
+
 static void mlx5_lag_destroy_single_fdb_filter(struct mlx5_lag *ldev,
                                               u32 filter)
 {
@@ -141,7 +211,7 @@ int mlx5_lag_shared_fdb_create(struct mlx5_lag *ldev,
                               enum mlx5_lag_mode mode,
                               u32 group_id)
 {
-       u32 filter = group_id ? group_id : MLX5_LAG_FILTER_PORTS;
+       u32 filter = group_id ? group_id : MLX5_LAG_FILTER_ALL;
        int idx = mlx5_lag_get_dev_index_by_seq_filter(ldev, MLX5_LAG_P1,
                                                       filter);
        struct mlx5_core_dev *dev0;
@@ -209,7 +279,7 @@ err_add_devices:
 
 void mlx5_lag_shared_fdb_destroy(struct mlx5_lag *ldev, u32 group_id)
 {
-       u32 filter = group_id ? group_id : MLX5_LAG_FILTER_PORTS;
+       u32 filter = group_id ? group_id : MLX5_LAG_FILTER_ALL;
        struct lag_func *pf;
        int err;
        int i;