From: Shay Drory Date: Sun, 31 May 2026 11:39:52 +0000 (+0300) Subject: net/mlx5e: TC, enable steering for SD LAG X-Git-Url: http://git.ipfire.org/gitweb/?a=commitdiff_plain;h=9f062b931daa63c4b53baa20d5487d5f7f8cee04;p=thirdparty%2Fkernel%2Flinux.git net/mlx5e: TC, enable steering for SD LAG Enable TC flow steering for SD LAG mode by extending multiport eligibility checks and peer flow handling. SD LAG operates similarly to MPESW for TC offloads - flows on secondary devices need peer flow creation on the primary, and multiport forwarding rules are eligible when either MPESW or SD LAG is active. Add mlx5_lag_is_sd() helper to query SD LAG mode, and mlx5_sd_is_primary() to identify the primary device. Redirect uplink priv/proto_dev queries to the primary device's eswitch in SD configurations. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260531113954.395443-13-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h index a0434ceebe69..28cab4bf525c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -104,6 +104,10 @@ struct mlx5e_tc_flow { * due to missing route) */ struct list_head peer_flows; /* flows on peer */ + int peer_index; /* peer-flow index pinned at add time, used at del + * time so removal is independent of LAG state + * changes between add and del. + */ struct net_device *orig_dev; /* netdev adding flow first */ int tmp_entry_index; struct list_head tmp_list; /* temporary flow list used by neigh update */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 2a16368a948e..910492eb51f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -71,6 +71,7 @@ #include #include "lag/lag.h" #include "lag/mp.h" +#include "lib/sd.h" #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18) @@ -2132,7 +2133,7 @@ static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow, mutex_unlock(&esw->offloads.peer_mutex); list_for_each_entry_safe(peer_flow, tmp, &flow->peer_flows, peer_flows) { - if (peer_index != mlx5_lag_get_dev_seq(peer_flow->priv->mdev)) + if (peer_index != peer_flow->peer_index) continue; list_del(&peer_flow->peer_flows); @@ -4196,9 +4197,26 @@ static bool is_lag_dev(struct mlx5e_priv *priv, same_hw_reps(priv, peer_netdev)); } +static bool is_sd_eligible(struct mlx5e_priv *priv, + struct net_device *peer_netdev) +{ + struct mlx5e_priv *peer_priv; + + peer_priv = netdev_priv(peer_netdev); + return same_hw_reps(priv, peer_netdev) && + mlx5_lag_is_sd(priv->mdev) && + (mlx5_sd_get_primary(priv->mdev) == + mlx5_sd_get_primary(peer_priv->mdev)); +} + static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev) { - return same_hw_reps(priv, out_dev) && mlx5_lag_is_mpesw(priv->mdev); + struct mlx5_core_dev *primary = mlx5_sd_get_primary(priv->mdev); + + if (!primary) + return false; + + return same_hw_reps(priv, out_dev) && mlx5_lag_is_mpesw(primary); } bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, @@ -4207,6 +4225,9 @@ bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, if (is_merged_eswitch_vfs(priv, out_dev)) return true; + if (is_sd_eligible(priv, out_dev)) + return true; + if (is_multiport_eligible(priv, out_dev)) return true; @@ -4351,7 +4372,7 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, return &tc->ht; } -static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) +static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow, bool *is_sd) { struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr; struct mlx5_flow_attr *attr = flow->attr; @@ -4372,6 +4393,13 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) if (mlx5_lag_is_mpesw(esw_attr->in_mdev)) return true; + if (mlx5_lag_is_sd(esw_attr->in_mdev) && + !mlx5_sd_is_primary(esw_attr->in_mdev)) { + if (!mlx5_lag_is_mpesw(mlx5_sd_get_primary(esw_attr->in_mdev))) + *is_sd = true; + return true; + } + return false; } @@ -4609,6 +4637,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, goto out; } + peer_flow->peer_index = i; list_add_tail(&peer_flow->peer_flows, &flow->peer_flows); flow_flag_set(flow, DUP); mutex_lock(&esw->offloads.peer_mutex); @@ -4628,19 +4657,26 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow **__flow) { struct mlx5_devcom_comp_dev *devcom = priv->mdev->priv.eswitch->devcom, *pos; + struct netlink_ext_ack *extack = f->common.extack; struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *in_rep = rpriv->rep; struct mlx5_core_dev *in_mdev = priv->mdev; struct mlx5_eswitch *peer_esw; struct mlx5e_tc_flow *flow; + bool is_sd = false; int err; + if (mlx5_lag_is_sd(in_mdev) && !mlx5_lag_is_active(in_mdev)) { + NL_SET_ERR_MSG_MOD(extack, "SD shared FDB not yet active"); + return -EOPNOTSUPP; + } + flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, in_mdev); if (IS_ERR(flow)) return PTR_ERR(flow); - if (!is_peer_flow_needed(flow)) { + if (!is_peer_flow_needed(flow, &is_sd)) { *__flow = flow; return 0; } @@ -4651,6 +4687,15 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, } mlx5_devcom_for_each_peer_entry(devcom, peer_esw, pos) { + if (is_sd) { + /* SD shared FDB: only the matching SD primary. */ + if (mlx5_sd_get_primary(in_mdev) != + mlx5_sd_get_primary(peer_esw->dev)) + continue; + } else { + if (!mlx5_sd_is_primary(peer_esw->dev)) + continue; + } err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags, peer_esw); if (err) goto peer_clean; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d65f30bb2f80..830fc910a080 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -4690,8 +4690,11 @@ EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps_nested); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) { + struct mlx5_core_dev *primary = mlx5_sd_get_primary(esw->dev); struct mlx5_eswitch_rep *rep; + if (primary) + esw = primary->priv.eswitch; rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); return rep->rep_data[rep_type].priv; } @@ -4713,6 +4716,11 @@ EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev); void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type) { + struct mlx5_core_dev *primary = mlx5_sd_get_primary(esw->dev); + + if (primary) + esw = primary->priv.eswitch; + return mlx5_eswitch_get_proto_dev(esw, MLX5_VPORT_UPLINK, rep_type); } EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index a2c7e2927431..dd3f18f85466 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -2425,6 +2425,20 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_lag_is_sriov); +bool mlx5_lag_is_sd(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + unsigned long flags; + bool res; + + spin_lock_irqsave(&lag_lock, flags); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_sd(ldev, dev); + spin_unlock_irqrestore(&lag_lock, flags); + + return res; +} + bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index 82b82bdb16dd..0296f752bb4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -202,6 +202,7 @@ static inline bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev) } #endif bool mlx5_lag_check_prereq(struct mlx5_lag *ldev); +bool mlx5_lag_is_sd(struct mlx5_core_dev *dev); int mlx5_lag_demux_init(struct mlx5_core_dev *dev, struct mlx5_flow_table_attr *ft_attr); void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c index ec606851feb8..25286ecd724e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c @@ -49,13 +49,16 @@ static int mlx5_sd_get_host_buses(struct mlx5_core_dev *dev) return sd->host_buses; } -static struct mlx5_core_dev *mlx5_sd_get_primary(struct mlx5_core_dev *dev) +struct mlx5_core_dev *mlx5_sd_get_primary(struct mlx5_core_dev *dev) { struct mlx5_sd *sd = mlx5_get_sd(dev); if (!sd) return dev; + if (!mlx5_devcom_comp_is_ready(sd->devcom)) + return NULL; + return sd->primary ? dev : sd->primary_dev; } @@ -69,6 +72,16 @@ struct mlx5_devcom_comp_dev *mlx5_sd_get_devcom(struct mlx5_core_dev *dev) return sd->devcom; } +bool mlx5_sd_is_primary(struct mlx5_core_dev *dev) +{ + struct mlx5_sd *sd = mlx5_get_sd(dev); + + if (!sd) + return true; + + return sd->primary; +} + struct mlx5_core_dev * mlx5_sd_primary_get_peer(struct mlx5_core_dev *primary, int idx) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h index bf59903ab23f..011702ff6f02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.h @@ -10,6 +10,8 @@ struct mlx5_sd; +struct mlx5_core_dev *mlx5_sd_get_primary(struct mlx5_core_dev *dev); +bool mlx5_sd_is_primary(struct mlx5_core_dev *dev); struct mlx5_core_dev *mlx5_sd_primary_get_peer(struct mlx5_core_dev *primary, int idx); int mlx5_sd_ch_ix_get_dev_ix(struct mlx5_core_dev *dev, int ch_ix); int mlx5_sd_ch_ix_get_vec_ix(struct mlx5_core_dev *dev, int ch_ix);