]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
net/mlx5: Stop waiting for PCI if pci channel is offline
authorMoshe Shemesh <moshe@nvidia.com>
Mon, 3 Jun 2024 21:04:42 +0000 (00:04 +0300)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 5 Jul 2024 07:08:11 +0000 (09:08 +0200)
[ Upstream commit 33afbfcc105a572159750f2ebee834a8a70fdd96 ]

In case pci channel becomes offline the driver should not wait for PCI
reads during health dump and recovery flow. The driver has timeout for
each of these loops trying to read PCI, so it would fail anyway.
However, in case of recovery waiting till timeout may cause the pci
error_detected() callback fail to meet pci_dpc_recovered() wait timeout.

Fixes: b3bd076f7501 ("net/mlx5: Report devlink health on FW fatal issues")
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Shay Drori <shayd@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/net/ethernet/mellanox/mlx5/core/fw.c
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c

index 13e86f0b42f54b9cf0dcae65d246b9c8f4caaaac..43e4bc222cfa74c4638fbd0d7aeaf903ef64e3d8 100644 (file)
@@ -337,6 +337,10 @@ int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev)
        do {
                if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
                        break;
+               if (pci_channel_offline(dev->pdev)) {
+                       mlx5_core_err(dev, "PCI channel offline, stop waiting for NIC IFC\n");
+                       return -EACCES;
+               }
 
                cond_resched();
        } while (!time_after(jiffies, end));
index f628887d8af8c1a8e30e5f0f88dbcec5e9425fbb..d4ad0e4192bbe9d3c47c6e6b18ceac7b7d26691e 100644 (file)
@@ -250,6 +250,10 @@ recover_from_sw_reset:
        do {
                if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
                        break;
+               if (pci_channel_offline(dev->pdev)) {
+                       mlx5_core_err(dev, "PCI channel offline, stop waiting for NIC IFC\n");
+                       goto unlock;
+               }
 
                msleep(20);
        } while (!time_after(jiffies, end));
@@ -322,6 +326,10 @@ static int mlx5_health_try_recover(struct mlx5_core_dev *dev)
                                      "health recovery flow aborted, PCI reads still not working\n");
                        return -EIO;
                }
+               if (pci_channel_offline(dev->pdev)) {
+                       mlx5_core_err(dev, "PCI channel offline, stop waiting for PCI\n");
+                       return -EACCES;
+               }
                msleep(100);
        }
 
index 6b774e0c2766594250271a2931b77b4540e7ba7c..d0b595ba611014bbfe16712506daf035a012fd7e 100644 (file)
@@ -74,6 +74,10 @@ int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev)
                        ret = -EBUSY;
                        goto pci_unlock;
                }
+               if (pci_channel_offline(dev->pdev)) {
+                       ret = -EACCES;
+                       goto pci_unlock;
+               }
 
                /* Check if semaphore is already locked */
                ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val);