]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
crypto: qat - flush misc workqueue during device shutdown
authorGiovanni Cabiddu <giovanni.cabiddu@intel.com>
Fri, 11 Jul 2025 12:27:43 +0000 (13:27 +0100)
committerHerbert Xu <herbert@gondor.apana.org.au>
Fri, 18 Jul 2025 10:52:00 +0000 (20:52 +1000)
Repeated loading and unloading of a device specific QAT driver, for
example qat_4xxx, in a tight loop can lead to a crash due to a
use-after-free scenario. This occurs when a power management (PM)
interrupt triggers just before the device-specific driver (e.g.,
qat_4xxx.ko) is unloaded, while the core driver (intel_qat.ko) remains
loaded.

Since the driver uses a shared workqueue (`qat_misc_wq`) across all
devices and owned by intel_qat.ko, a deferred routine from the
device-specific driver may still be pending in the queue. If this
routine executes after the driver is unloaded, it can dereference freed
memory, resulting in a page fault and kernel crash like the following:

    BUG: unable to handle page fault for address: ffa000002e50a01c
    #PF: supervisor read access in kernel mode
    RIP: 0010:pm_bh_handler+0x1d2/0x250 [intel_qat]
    Call Trace:
      pm_bh_handler+0x1d2/0x250 [intel_qat]
      process_one_work+0x171/0x340
      worker_thread+0x277/0x3a0
      kthread+0xf0/0x120
      ret_from_fork+0x2d/0x50

To prevent this, flush the misc workqueue during device shutdown to
ensure that all pending work items are completed before the driver is
unloaded.

Note: This approach may slightly increase shutdown latency if the
workqueue contains jobs from other devices, but it ensures correctness
and stability.

Fixes: e5745f34113b ("crypto: qat - enable power management for QAT GEN4")
Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Cc: stable@vger.kernel.org
Reviewed-by: Ahsan Atta <ahsan.atta@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
drivers/crypto/intel/qat/qat_common/adf_common_drv.h
drivers/crypto/intel/qat/qat_common/adf_init.c
drivers/crypto/intel/qat/qat_common/adf_isr.c

index eaa6388a6678b03d16ac35d70c83095435412d71..7a022bd4ae07caefa4d61f4dbf18322eeb484af9 100644 (file)
@@ -189,6 +189,7 @@ void adf_exit_misc_wq(void);
 bool adf_misc_wq_queue_work(struct work_struct *work);
 bool adf_misc_wq_queue_delayed_work(struct delayed_work *work,
                                    unsigned long delay);
+void adf_misc_wq_flush(void);
 #if defined(CONFIG_PCI_IOV)
 int adf_sriov_configure(struct pci_dev *pdev, int numvfs);
 void adf_disable_sriov(struct adf_accel_dev *accel_dev);
index f189cce7d153584bbdd0f7930a7d07333c9c548f..46491048e0bb42175eae61edce8b657ca7d8d5b0 100644 (file)
@@ -404,6 +404,7 @@ static void adf_dev_shutdown(struct adf_accel_dev *accel_dev)
                hw_data->exit_admin_comms(accel_dev);
 
        adf_cleanup_etr_data(accel_dev);
+       adf_misc_wq_flush();
        adf_dev_restore(accel_dev);
 }
 
index cae1aee5479aff04e7c123674b9f5e0cb33f67d9..12e5656136610cd8ac7592f9b5426ccaccbb6196 100644 (file)
@@ -407,3 +407,8 @@ bool adf_misc_wq_queue_delayed_work(struct delayed_work *work,
 {
        return queue_delayed_work(adf_misc_wq, work, delay);
 }
+
+void adf_misc_wq_flush(void)
+{
+       flush_workqueue(adf_misc_wq);
+}