]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
bnxt_en: bring back rtnl_lock() in the bnxt_open() path
authorMichael Chan <michael.chan@broadcom.com>
Wed, 14 May 2025 06:29:08 +0000 (23:29 -0700)
committerJakub Kicinski <kuba@kernel.org>
Thu, 15 May 2025 14:14:15 +0000 (07:14 -0700)
Error recovery, PCIe AER, resume, and TX timeout will invoke bnxt_open()
with netdev_lock only.  This will cause RTNL assert failure in
netif_set_real_num_tx_queues(), netif_set_real_num_tx_queues(),
and netif_set_real_num_tx_queues().

Example error recovery assert:

RTNL: assertion failed at net/core/dev.c (3178)
WARNING: CPU: 3 PID: 3392 at net/core/dev.c:3178 netif_set_real_num_tx_queues+0x1fd/0x210

Call Trace:
 <TASK>
 ? __pfx_bnxt_msix+0x10/0x10 [bnxt_en]
 __bnxt_open_nic+0x1ef/0xb20 [bnxt_en]
 bnxt_open+0xda/0x130 [bnxt_en]
 bnxt_fw_reset_task+0x21f/0x780 [bnxt_en]
 process_scheduled_works+0x9d/0x400

For now, bring back rtnl_lock() in all these code paths that can invoke
bnxt_open().  In the bnxt_queue_start() error path, we don't have
rtnl_lock held so we just change it to call netif_close() instead of
bnxt_reset_task() for simplicity.  This error path is unlikely so it
should be fine.

Fixes: 004b5008016a ("eth: bnxt: remove most dependencies on RTNL")
Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Link: https://patch.msgid.link/20250514062908.2766677-1-michael.chan@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/broadcom/bnxt/bnxt.c

index 86a5de44b6f3fe4afc4a1885f6ee21249cd5a4cf..6afc2ab6fad22883fa6b73716826851bfe9ac529 100644 (file)
@@ -14013,13 +14013,28 @@ static void bnxt_unlock_sp(struct bnxt *bp)
        netdev_unlock(bp->dev);
 }
 
+/* Same as bnxt_lock_sp() with additional rtnl_lock */
+static void bnxt_rtnl_lock_sp(struct bnxt *bp)
+{
+       clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
+       rtnl_lock();
+       netdev_lock(bp->dev);
+}
+
+static void bnxt_rtnl_unlock_sp(struct bnxt *bp)
+{
+       set_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
+       netdev_unlock(bp->dev);
+       rtnl_unlock();
+}
+
 /* Only called from bnxt_sp_task() */
 static void bnxt_reset(struct bnxt *bp, bool silent)
 {
-       bnxt_lock_sp(bp);
+       bnxt_rtnl_lock_sp(bp);
        if (test_bit(BNXT_STATE_OPEN, &bp->state))
                bnxt_reset_task(bp, silent);
-       bnxt_unlock_sp(bp);
+       bnxt_rtnl_unlock_sp(bp);
 }
 
 /* Only called from bnxt_sp_task() */
@@ -14027,9 +14042,9 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
 {
        int i;
 
-       bnxt_lock_sp(bp);
+       bnxt_rtnl_lock_sp(bp);
        if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
-               bnxt_unlock_sp(bp);
+               bnxt_rtnl_unlock_sp(bp);
                return;
        }
        /* Disable and flush TPA before resetting the RX ring */
@@ -14068,7 +14083,7 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
        }
        if (bp->flags & BNXT_FLAG_TPA)
                bnxt_set_tpa(bp, true);
-       bnxt_unlock_sp(bp);
+       bnxt_rtnl_unlock_sp(bp);
 }
 
 static void bnxt_fw_fatal_close(struct bnxt *bp)
@@ -14960,15 +14975,17 @@ static void bnxt_fw_reset_task(struct work_struct *work)
                bp->fw_reset_state = BNXT_FW_RESET_STATE_OPENING;
                fallthrough;
        case BNXT_FW_RESET_STATE_OPENING:
-               while (!netdev_trylock(bp->dev)) {
+               while (!rtnl_trylock()) {
                        bnxt_queue_fw_reset_work(bp, HZ / 10);
                        return;
                }
+               netdev_lock(bp->dev);
                rc = bnxt_open(bp->dev);
                if (rc) {
                        netdev_err(bp->dev, "bnxt_open() failed during FW reset\n");
                        bnxt_fw_reset_abort(bp, rc);
                        netdev_unlock(bp->dev);
+                       rtnl_unlock();
                        goto ulp_start;
                }
 
@@ -14988,6 +15005,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
                        bnxt_dl_health_fw_status_update(bp, true);
                }
                netdev_unlock(bp->dev);
+               rtnl_unlock();
                bnxt_ulp_start(bp, 0);
                bnxt_reenable_sriov(bp);
                netdev_lock(bp->dev);
@@ -15936,7 +15954,7 @@ err_reset:
                   rc);
        napi_enable_locked(&bnapi->napi);
        bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons);
-       bnxt_reset_task(bp, true);
+       netif_close(dev);
        return rc;
 }
 
@@ -16752,6 +16770,7 @@ static int bnxt_resume(struct device *device)
        struct bnxt *bp = netdev_priv(dev);
        int rc = 0;
 
+       rtnl_lock();
        netdev_lock(dev);
        rc = pci_enable_device(bp->pdev);
        if (rc) {
@@ -16796,6 +16815,7 @@ static int bnxt_resume(struct device *device)
 
 resume_exit:
        netdev_unlock(bp->dev);
+       rtnl_unlock();
        bnxt_ulp_start(bp, rc);
        if (!rc)
                bnxt_reenable_sriov(bp);
@@ -16961,6 +16981,7 @@ static void bnxt_io_resume(struct pci_dev *pdev)
        int err;
 
        netdev_info(bp->dev, "PCI Slot Resume\n");
+       rtnl_lock();
        netdev_lock(netdev);
 
        err = bnxt_hwrm_func_qcaps(bp);
@@ -16978,6 +16999,7 @@ static void bnxt_io_resume(struct pci_dev *pdev)
                netif_device_attach(netdev);
 
        netdev_unlock(netdev);
+       rtnl_unlock();
        bnxt_ulp_start(bp, err);
        if (!err)
                bnxt_reenable_sriov(bp);