]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
net/mlx5: Nack sync reset when SFs are present
authorMoshe Shemesh <moshe@nvidia.com>
Mon, 25 Aug 2025 14:34:30 +0000 (17:34 +0300)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 4 Sep 2025 13:31:49 +0000 (15:31 +0200)
[ Upstream commit 26e42ec7712d392d561964514b1f253b1a96f42d ]

If PF (Physical Function) has SFs (Sub-Functions), since the SFs are not
taking part in the synchronization flow, sync reset can lead to fatal
error on the SFs, as the function will be closed unexpectedly from the
SF point of view.

Add a check to prevent sync reset when there are SFs on a PF device
which is not ECPF, as ECPF is teardowned gracefully before reset.

Fixes: 92501fa6e421 ("net/mlx5: Ack on sync_reset_request only if PF can do reset_now")
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Link: https://patch.msgid.link/20250825143435.598584-8-mbloch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h

index 0829912157c978f26f9e4b1e12f0eadc48121eba..516df7f1997ebc1e24eba61eacb847bf3534b50f 100644 (file)
@@ -6,6 +6,7 @@
 #include "fw_reset.h"
 #include "diag/fw_tracer.h"
 #include "lib/tout.h"
+#include "sf/sf.h"
 
 enum {
        MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
@@ -423,6 +424,11 @@ static bool mlx5_is_reset_now_capable(struct mlx5_core_dev *dev,
                return false;
        }
 
+       if (!mlx5_core_is_ecpf(dev) && !mlx5_sf_table_empty(dev)) {
+               mlx5_core_warn(dev, "SFs should be removed before reset\n");
+               return false;
+       }
+
 #if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)
        if (reset_method != MLX5_MFRL_REG_PCI_RESET_METHOD_HOT_RESET) {
                err = mlx5_check_hotplug_interrupt(dev);
index b96909fbeb12de3336fd4c0a6f254aedef670c70..bdac3db1bd61dee5b7ea873db5ac0146a5a59d1b 100644 (file)
@@ -518,3 +518,13 @@ void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev)
        WARN_ON(!xa_empty(&table->function_ids));
        kfree(table);
 }
+
+bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev)
+{
+       struct mlx5_sf_table *table = dev->priv.sf_table;
+
+       if (!table)
+               return true;
+
+       return xa_empty(&table->function_ids);
+}
index 860f9ddb7107b858bc8b67904826cdd52b7aa4ba..89559a37997ad695dd60b24203fa206ce8838cae 100644 (file)
@@ -17,6 +17,7 @@ void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev);
 
 int mlx5_sf_table_init(struct mlx5_core_dev *dev);
 void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev);
+bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev);
 
 int mlx5_devlink_sf_port_new(struct devlink *devlink,
                             const struct devlink_port_new_attrs *add_attr,
@@ -61,6 +62,11 @@ static inline void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev)
 {
 }
 
+static inline bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev)
+{
+       return true;
+}
+
 #endif
 
 #endif