From: Cosmin Ratiu Date: Thu, 31 Oct 2024 12:58:52 +0000 (+0200) Subject: net/mlx5: Rework esw qos domain init and cleanup X-Git-Tag: v6.13-rc1~135^2~147^2~4 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=cac7356c653d1410838209b6e840a705898d1811;p=thirdparty%2Fkernel%2Flinux.git net/mlx5: Rework esw qos domain init and cleanup The first approach was flawed, because there are situations where the esw mode change fails, leaving the qos domain as NULL. Various calls into the QoS infra then trigger a NULL pointer access and unhappiness. Improve that by a combination of: - Allocating the QoS domain on esw init and cleaning it up on teardown. - Refactoring mode change to only call qos domain init but not cleanup. - Making qos domain init idempotent - not change anything if nothing needs changing. Together, these should guarantee that, as long as the memory allocations succeed, there should always be a valid qos domain until the esw cleanup, no matter what mode changes happen (or failures thereof). Fixes: 107a034d5c1e ("net/mlx5: qos: Store rate groups in a qos domain") Signed-off-by: Cosmin Ratiu Reviewed-by: Carolina Jubran Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241031125856.530927-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 7e7f99b38a372..940e1c2d1e398 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -951,6 +951,9 @@ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char * int mlx5_esw_qos_init(struct mlx5_eswitch *esw) { + if (esw->qos.domain) + return 0; /* Nothing to change. */ + return esw_qos_domain_init(esw); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 09719e9b86113..cead41ddbc387 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1485,7 +1485,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) err = mlx5_esw_qos_init(esw); if (err) - goto err_qos_init; + goto err_esw_init; if (esw->mode == MLX5_ESWITCH_LEGACY) { err = esw_legacy_enable(esw); @@ -1495,7 +1495,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) } if (err) - goto err_esw_enable; + goto err_esw_init; esw->fdb_table.flags |= MLX5_ESW_FDB_CREATED; @@ -1509,9 +1509,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) return 0; -err_esw_enable: - mlx5_esw_qos_cleanup(esw); -err_qos_init: +err_esw_init: mlx5_eq_notifier_unregister(esw->dev, &esw->nb); mlx5_esw_acls_ns_cleanup(esw); return err; @@ -1640,7 +1638,6 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw) if (esw->mode == MLX5_ESWITCH_OFFLOADS) devl_rate_nodes_destroy(devlink); - mlx5_esw_qos_cleanup(esw); } void mlx5_eswitch_disable(struct mlx5_eswitch *esw) @@ -1884,6 +1881,11 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) if (err) goto reps_err; + esw->mode = MLX5_ESWITCH_LEGACY; + err = mlx5_esw_qos_init(esw); + if (err) + goto reps_err; + mutex_init(&esw->offloads.encap_tbl_lock); hash_init(esw->offloads.encap_tbl); mutex_init(&esw->offloads.decap_tbl_lock); @@ -1897,7 +1899,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) refcount_set(&esw->qos.refcnt, 0); esw->enabled_vports = 0; - esw->mode = MLX5_ESWITCH_LEGACY; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) @@ -1934,6 +1935,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw_info(esw->dev, "cleanup\n"); + mlx5_esw_qos_cleanup(esw); destroy_workqueue(esw->work_queue); WARN_ON(refcount_read(&esw->qos.refcnt)); mutex_destroy(&esw->state_lock);