]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
pds_core: Fix pdsc_check_pci_health function to use work thread
authorBrett Creeley <brett.creeley@amd.com>
Mon, 8 Apr 2024 16:35:40 +0000 (09:35 -0700)
committerDavid S. Miller <davem@davemloft.net>
Wed, 10 Apr 2024 07:30:10 +0000 (08:30 +0100)
When the driver notices fw_status == 0xff it tries to perform a PCI
reset on itself via pci_reset_function() in the context of the driver's
health thread. However, pdsc_reset_prepare calls
pdsc_stop_health_thread(), which attempts to stop/flush the health
thread. This results in a deadlock because the stop/flush will never
complete since the driver called pci_reset_function() from the health
thread context. Fix by changing the pdsc_check_pci_health_function()
to queue a newly introduced pdsc_pci_reset_thread() on the pdsc's
work queue.

Unloading the driver in the fw_down/dead state uncovered another issue,
which can be seen in the following trace:

WARNING: CPU: 51 PID: 6914 at kernel/workqueue.c:1450 __queue_work+0x358/0x440
[...]
RIP: 0010:__queue_work+0x358/0x440
[...]
Call Trace:
 <TASK>
 ? __warn+0x85/0x140
 ? __queue_work+0x358/0x440
 ? report_bug+0xfc/0x1e0
 ? handle_bug+0x3f/0x70
 ? exc_invalid_op+0x17/0x70
 ? asm_exc_invalid_op+0x1a/0x20
 ? __queue_work+0x358/0x440
 queue_work_on+0x28/0x30
 pdsc_devcmd_locked+0x96/0xe0 [pds_core]
 pdsc_devcmd_reset+0x71/0xb0 [pds_core]
 pdsc_teardown+0x51/0xe0 [pds_core]
 pdsc_remove+0x106/0x200 [pds_core]
 pci_device_remove+0x37/0xc0
 device_release_driver_internal+0xae/0x140
 driver_detach+0x48/0x90
 bus_remove_driver+0x6d/0xf0
 pci_unregister_driver+0x2e/0xa0
 pdsc_cleanup_module+0x10/0x780 [pds_core]
 __x64_sys_delete_module+0x142/0x2b0
 ? syscall_trace_enter.isra.18+0x126/0x1a0
 do_syscall_64+0x3b/0x90
 entry_SYSCALL_64_after_hwframe+0x72/0xdc
RIP: 0033:0x7fbd9d03a14b
[...]

Fix this by preventing the devcmd reset if the FW is not running.

Fixes: d9407ff11809 ("pds_core: Prevent health thread from running during reset/remove")
Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
Signed-off-by: Brett Creeley <brett.creeley@amd.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/amd/pds_core/core.c
drivers/net/ethernet/amd/pds_core/core.h
drivers/net/ethernet/amd/pds_core/dev.c
drivers/net/ethernet/amd/pds_core/main.c

index 9662ee72814c0c64fab3ca08db99e888faa51124..536635e5772799e17ef31857c655735c5ef88865 100644 (file)
@@ -593,6 +593,16 @@ err_out:
        pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
 }
 
+void pdsc_pci_reset_thread(struct work_struct *work)
+{
+       struct pdsc *pdsc = container_of(work, struct pdsc, pci_reset_work);
+       struct pci_dev *pdev = pdsc->pdev;
+
+       pci_dev_get(pdev);
+       pci_reset_function(pdev);
+       pci_dev_put(pdev);
+}
+
 static void pdsc_check_pci_health(struct pdsc *pdsc)
 {
        u8 fw_status;
@@ -607,7 +617,8 @@ static void pdsc_check_pci_health(struct pdsc *pdsc)
        if (fw_status != PDS_RC_BAD_PCI)
                return;
 
-       pci_reset_function(pdsc->pdev);
+       /* prevent deadlock between pdsc_reset_prepare and pdsc_health_thread */
+       queue_work(pdsc->wq, &pdsc->pci_reset_work);
 }
 
 void pdsc_health_thread(struct work_struct *work)
index 92d7657dd6147e7770b7d72f8ee25deb303370b9..a3e17a0c187a6a4a5eaab7207f7089b2172b8e0e 100644 (file)
@@ -197,6 +197,7 @@ struct pdsc {
        struct pdsc_qcq notifyqcq;
        u64 last_eid;
        struct pdsc_viftype *viftype_status;
+       struct work_struct pci_reset_work;
 };
 
 /** enum pds_core_dbell_bits - bitwise composition of dbell values.
@@ -313,5 +314,6 @@ int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw,
 
 void pdsc_fw_down(struct pdsc *pdsc);
 void pdsc_fw_up(struct pdsc *pdsc);
+void pdsc_pci_reset_thread(struct work_struct *work);
 
 #endif /* _PDSC_H_ */
index e494e1298dc9a36c175a6b86623450c0da55ad7b..495ef4ef8c103d6fcacd8b155dbc09e42d68345c 100644 (file)
@@ -229,6 +229,9 @@ int pdsc_devcmd_reset(struct pdsc *pdsc)
                .reset.opcode = PDS_CORE_CMD_RESET,
        };
 
+       if (!pdsc_is_fw_running(pdsc))
+               return 0;
+
        return pdsc_devcmd(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
 }
 
index ab6133e7db422d3579291e3476efb48e5e0be06c..660268ff95623fbe9c86ee2078913c85a3579a5a 100644 (file)
@@ -239,6 +239,7 @@ static int pdsc_init_pf(struct pdsc *pdsc)
        snprintf(wq_name, sizeof(wq_name), "%s.%d", PDS_CORE_DRV_NAME, pdsc->uid);
        pdsc->wq = create_singlethread_workqueue(wq_name);
        INIT_WORK(&pdsc->health_work, pdsc_health_thread);
+       INIT_WORK(&pdsc->pci_reset_work, pdsc_pci_reset_thread);
        timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0);
        pdsc->wdtimer_period = PDSC_WATCHDOG_SECS * HZ;