From: Shay Drory Date: Fri, 12 Jun 2026 11:39:03 +0000 (+0300) Subject: net/mlx5: SD, defer vport metadata init until SD is ready X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e3a02f3ecb13b3482315afb171df85651f0f8734;p=thirdparty%2Fkernel%2Flinux.git net/mlx5: SD, defer vport metadata init until SD is ready Allow SD devices to transition to switchdev before the SD group is fully up. Metadata allocation requires the SD group to be ready, so defer it from esw_offloads_enable() until SD shared-FDB activation. Add mlx5_esw_offloads_init_deferred_metadata() which allocates per-vport metadata and refreshes the ingress ACLs that were previously programmed with metadata=0. The helper is idempotent and can be called multiple times. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20260612113904.537595-15-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index b2b3150f1f041..fea72b1dedab9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -440,6 +440,7 @@ struct mlx5_eswitch { void esw_offloads_disable(struct mlx5_eswitch *esw); int esw_offloads_enable(struct mlx5_eswitch *esw); +int mlx5_esw_offloads_init_deferred_metadata(struct mlx5_eswitch *esw); void esw_offloads_cleanup(struct mlx5_eswitch *esw); int esw_offloads_init(struct mlx5_eswitch *esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 4dc190a4e7b28..8fa7e633451cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3675,6 +3675,7 @@ static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw, WARN_ON(vport->metadata != vport->default_metadata); mlx5_esw_match_metadata_free(esw, vport->default_metadata); + vport->default_metadata = 0; } static void esw_offloads_metadata_uninit(struct mlx5_eswitch *esw) @@ -3711,6 +3712,73 @@ metadata_err: return err; } +/* Deferred metadata init for SD devices: allocate vport metadata and + * refresh the ingress ACL for every vport whose ACL was created with + * metadata=0 in esw_create_offloads_acl_tables() / esw_vport_setup(). + * + * No Rep is loaded at this point ==> no Rep net-dev exists, so no need + * to take rtnl lock. + * + * Safe to call multiple times - subsequent calls are no-ops. + */ +int mlx5_esw_offloads_init_deferred_metadata(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *manager, *vport; + unsigned long i; + int err; + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) + return 0; + + manager = mlx5_eswitch_get_vport(esw, esw->manager_vport); + if (IS_ERR(manager)) + return PTR_ERR(manager); + + /* Sanity check: skip if metadata was already initialized */ + if (manager->default_metadata) + return 0; + + err = esw_offloads_metadata_init(esw); + if (err) + return err; + + mutex_lock(&esw->state_lock); + /* Manager vport doesn't have a rep/netdev loaded but its ingress ACL + * was programmed with metadata=0 - refresh it explicitly. + */ + err = mlx5_esw_acl_ingress_vport_metadata_update(esw, + esw->manager_vport, + 0); + if (err) + goto err_acl; + + /* UPLINK is never marked enabled but its ACL is programmed in + * esw_create_offloads_acl_tables(); refresh it explicitly. + */ + err = mlx5_esw_acl_ingress_vport_metadata_update(esw, MLX5_VPORT_UPLINK, + 0); + if (err) + goto err_acl; + + mlx5_esw_for_each_vport(esw, i, vport) { + if (!vport || !vport->enabled) + continue; + err = mlx5_esw_acl_ingress_vport_metadata_update(esw, + vport->vport, + 0); + if (err) + goto err_acl; + } + + mutex_unlock(&esw->state_lock); + return 0; + +err_acl: + esw_offloads_metadata_uninit(esw); + mutex_unlock(&esw->state_lock); + return err; +} + int esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport) @@ -4072,9 +4140,14 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) if (err) goto err_roce; - err = esw_offloads_metadata_init(esw); - if (err) - goto err_metadata; + /* SD devices defer metadata init until SD is ready and + * mlx5_sd_pf_num_get() can return the correct pf_num. + */ + if (!mlx5_get_sd(esw->dev)) { + err = esw_offloads_metadata_init(esw); + if (err) + goto err_metadata; + } err = esw_set_passing_vport_metadata(esw, true); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c index b35795bac0983..2fcccd329eb5d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c @@ -992,6 +992,7 @@ static bool mlx5_sd_all_paired(struct mlx5_core_dev *primary) static void mlx5_sd_activate_shared_fdb(struct mlx5_core_dev *primary) { struct mlx5_sd *sd = mlx5_get_sd(primary); + struct mlx5_core_dev *pos; struct mlx5_lag *ldev; struct lag_func *pf; int err; @@ -1024,6 +1025,21 @@ static void mlx5_sd_activate_shared_fdb(struct mlx5_core_dev *primary) goto unlock; } + /* Initialize vport metadata for all group devices. This is deferred + * from esw_offloads_enable() because mlx5_sd_pf_num_get() requires + * the SD group to be ready. + */ + mlx5_sd_for_each_dev(i, primary, pos) { + struct mlx5_eswitch *esw = pos->priv.eswitch; + + err = mlx5_esw_offloads_init_deferred_metadata(esw); + if (err) { + sd_warn(primary, "Failed to init metadata for %s: %d\n", + dev_name(pos->device), err); + goto unlock; + } + } + err = mlx5_lag_shared_fdb_create(ldev, NULL, 0, sd->group_id); if (err) sd_warn(primary, "Failed to create shared FDB: %d\n", err);