]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
net/mlx5: Always drain health in shutdown callback
authorShay Drory <shayd@nvidia.com>
Tue, 30 Jul 2024 06:16:30 +0000 (09:16 +0300)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 11 Aug 2024 10:57:54 +0000 (12:57 +0200)
[ Upstream commit 1b75da22ed1e6171e261bc9265370162553d5393 ]

There is no point in recovery during device shutdown. if health
work started need to wait for it to avoid races and NULL pointer
access.

Hence, drain health WQ on shutdown callback.

Fixes: 1958fc2f0712 ("net/mlx5: SF, Add auxiliary device driver")
Fixes: d2aa060d40fa ("net/mlx5: Cancel health poll before sending panic teardown command")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
Link: https://patch.msgid.link/20240730061638.1831002-2-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c

index 459a836a5d9c15321409dc75ebc6a84e66c3fac7..3e55a6c6a7c9bf420a0bc3d1718412c793f4a8f6 100644 (file)
@@ -2140,7 +2140,6 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
        /* Panic tear down fw command will stop the PCI bus communication
         * with the HCA, so the health poll is no longer needed.
         */
-       mlx5_drain_health_wq(dev);
        mlx5_stop_health_poll(dev, false);
 
        ret = mlx5_cmd_fast_teardown_hca(dev);
@@ -2175,6 +2174,7 @@ static void shutdown(struct pci_dev *pdev)
 
        mlx5_core_info(dev, "Shutdown was called\n");
        set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
+       mlx5_drain_health_wq(dev);
        err = mlx5_try_fast_unload(dev);
        if (err)
                mlx5_unload_one(dev, false);
index b2986175d9afe80abb0e109fcfe56590f14b0cf7..b706f1486504a7c90049d9071c6541a7016067de 100644 (file)
@@ -112,6 +112,7 @@ static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev)
        struct mlx5_core_dev *mdev = sf_dev->mdev;
 
        set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state);
+       mlx5_drain_health_wq(mdev);
        mlx5_unload_one(mdev, false);
 }