]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
scsi: mpi3mr: Support PCI Error Recovery callback handlers
authorSumit Saxena <sumit.saxena@broadcom.com>
Thu, 27 Jun 2024 10:17:33 +0000 (15:47 +0530)
committerMartin K. Petersen <martin.petersen@oracle.com>
Fri, 5 Jul 2024 03:37:07 +0000 (23:37 -0400)
PCI Error recovery support is required to recover the controller upon
detection of PCI errors. Add support for the PCI error recovery callback
handlers in mpi3mr driver.

Co-developed-by: Sathya Prakash <sathya.prakash@broadcom.com>
Signed-off-by: Sathya Prakash <sathya.prakash@broadcom.com>
Co-developed-by: Ranjan Kumar <ranjan.kumar@broadcom.com>
Signed-off-by: Ranjan Kumar <ranjan.kumar@broadcom.com>
Signed-off-by: Sumit Saxena <sumit.saxena@broadcom.com>
Link: https://lore.kernel.org/r/20240627101735.18286-2-sumit.saxena@broadcom.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/mpi3mr/mpi3mr.h
drivers/scsi/mpi3mr/mpi3mr_os.c

index c8968f12b9e652957534594ba2f41c5ffc0076d1..2b1d5645ba9b2b81d4e6be5004d3b61317f83c16 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/aer.h>
 #include <linux/poll.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
@@ -129,6 +130,7 @@ extern atomic64_t event_counter;
 #define MPI3MR_PREPARE_FOR_RESET_TIMEOUT       180
 #define MPI3MR_RESET_ACK_TIMEOUT               30
 #define MPI3MR_MUR_TIMEOUT                     120
+#define MPI3MR_RESET_TIMEOUT                   510
 
 #define MPI3MR_WATCHDOG_INTERVAL               1000 /* in milli seconds */
 
@@ -1153,6 +1155,8 @@ struct scmd_priv {
  * @trace_release_trigger_active: Trace trigger active flag
  * @fw_release_trigger_active: Fw release trigger active flag
  * @snapdump_trigger_active: Snapdump trigger active flag
+ * @pci_err_recovery: PCI error recovery in progress
+ * @block_on_pci_err: Block IO during PCI error recovery
  */
 struct mpi3mr_ioc {
        struct list_head list;
@@ -1353,6 +1357,8 @@ struct mpi3mr_ioc {
        bool snapdump_trigger_active;
        bool trace_release_trigger_active;
        bool fw_release_trigger_active;
+       bool pci_err_recovery;
+       bool block_on_pci_err;
 };
 
 /**
index eac179dc937004503cdca4fb380856466f5b3613..0986b362e5f0acd62289085777af27ad38fd5b39 100644 (file)
@@ -5546,6 +5546,197 @@ mpi3mr_resume(struct device *dev)
        return 0;
 }
 
+/**
+ * mpi3mr_pcierr_error_detected - PCI error detected callback
+ * @pdev: PCI device instance
+ * @state: channel state
+ *
+ * This function is called by the PCI error recovery driver and
+ * based on the state passed the driver decides what actions to
+ * be recommended back to PCI driver.
+ *
+ * For all of the states if there is no valid mrioc or scsi host
+ * references in the PCI device then this function will return
+ * the result as disconnect.
+ *
+ * For normal state, this function will return the result as can
+ * recover.
+ *
+ * For frozen state, this function will block for any pending
+ * controller initialization or re-initialization to complete,
+ * stop any new interactions with the controller and return
+ * status as reset required.
+ *
+ * For permanent failure state, this function will mark the
+ * controller as unrecoverable and return status as disconnect.
+ *
+ * Returns: PCI_ERS_RESULT_NEED_RESET or CAN_RECOVER or
+ * DISCONNECT based on the controller state.
+ */
+static pci_ers_result_t
+mpi3mr_pcierr_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+       struct Scsi_Host *shost;
+       struct mpi3mr_ioc *mrioc;
+       unsigned int timeout = MPI3MR_RESET_TIMEOUT;
+
+       dev_info(&pdev->dev, "%s: callback invoked state(%d)\n", __func__,
+           state);
+
+       shost = pci_get_drvdata(pdev);
+       mrioc = shost_priv(shost);
+
+       switch (state) {
+       case pci_channel_io_normal:
+               return PCI_ERS_RESULT_CAN_RECOVER;
+       case pci_channel_io_frozen:
+               mrioc->pci_err_recovery = true;
+               mrioc->block_on_pci_err = true;
+               do {
+                       if (mrioc->reset_in_progress || mrioc->is_driver_loading)
+                               ssleep(1);
+                       else
+                               break;
+               } while (--timeout);
+
+               if (!timeout) {
+                       mrioc->pci_err_recovery = true;
+                       mrioc->block_on_pci_err = true;
+                       mrioc->unrecoverable = 1;
+                       mpi3mr_stop_watchdog(mrioc);
+                       mpi3mr_flush_cmds_for_unrecovered_controller(mrioc);
+                       return PCI_ERS_RESULT_DISCONNECT;
+               }
+
+               scsi_block_requests(mrioc->shost);
+               mpi3mr_stop_watchdog(mrioc);
+               mpi3mr_cleanup_resources(mrioc);
+               return PCI_ERS_RESULT_NEED_RESET;
+       case pci_channel_io_perm_failure:
+               mrioc->pci_err_recovery = true;
+               mrioc->block_on_pci_err = true;
+               mrioc->unrecoverable = 1;
+               mpi3mr_stop_watchdog(mrioc);
+               mpi3mr_flush_cmds_for_unrecovered_controller(mrioc);
+               return PCI_ERS_RESULT_DISCONNECT;
+       default:
+               return PCI_ERS_RESULT_DISCONNECT;
+       }
+}
+
+/**
+ * mpi3mr_pcierr_slot_reset - Post slot reset callback
+ * @pdev: PCI device instance
+ *
+ * This function is called by the PCI error recovery driver
+ * after a slot or link reset issued by it for the recovery, the
+ * driver is expected to bring back the controller and
+ * initialize it.
+ *
+ * This function restores PCI state and reinitializes controller
+ * resources and the controller, this blocks for any pending
+ * reset to complete.
+ *
+ * Returns: PCI_ERS_RESULT_DISCONNECT on failure or
+ * PCI_ERS_RESULT_RECOVERED
+ */
+static pci_ers_result_t mpi3mr_pcierr_slot_reset(struct pci_dev *pdev)
+{
+       struct Scsi_Host *shost;
+       struct mpi3mr_ioc *mrioc;
+       unsigned int timeout = MPI3MR_RESET_TIMEOUT;
+
+       dev_info(&pdev->dev, "%s: callback invoked\n", __func__);
+
+       shost = pci_get_drvdata(pdev);
+       mrioc = shost_priv(shost);
+
+       do {
+               if (mrioc->reset_in_progress)
+                       ssleep(1);
+               else
+                       break;
+       } while (--timeout);
+
+       if (!timeout)
+               goto out_failed;
+
+       pci_restore_state(pdev);
+
+       if (mpi3mr_setup_resources(mrioc)) {
+               ioc_err(mrioc, "setup resources failed\n");
+               goto out_failed;
+       }
+       mrioc->unrecoverable = 0;
+       mrioc->pci_err_recovery = false;
+
+       if (mpi3mr_soft_reset_handler(mrioc, MPI3MR_RESET_FROM_FIRMWARE, 0))
+               goto out_failed;
+
+       return PCI_ERS_RESULT_RECOVERED;
+
+out_failed:
+       mrioc->unrecoverable = 1;
+       mrioc->block_on_pci_err = false;
+       scsi_unblock_requests(shost);
+       mpi3mr_start_watchdog(mrioc);
+       return PCI_ERS_RESULT_DISCONNECT;
+}
+
+/**
+ * mpi3mr_pcierr_resume - PCI error recovery resume
+ * callback
+ * @pdev: PCI device instance
+ *
+ * This function enables all I/O and IOCTLs post reset issued as
+ * part of the PCI error recovery
+ *
+ * Return: Nothing.
+ */
+static void mpi3mr_pcierr_resume(struct pci_dev *pdev)
+{
+       struct Scsi_Host *shost;
+       struct mpi3mr_ioc *mrioc;
+
+       dev_info(&pdev->dev, "%s: callback invoked\n", __func__);
+
+       shost = pci_get_drvdata(pdev);
+       mrioc = shost_priv(shost);
+
+       if (mrioc->block_on_pci_err) {
+               mrioc->block_on_pci_err = false;
+               scsi_unblock_requests(shost);
+               mpi3mr_start_watchdog(mrioc);
+       }
+}
+
+/**
+ * mpi3mr_pcierr_mmio_enabled - PCI error recovery callback
+ * @pdev: PCI device instance
+ *
+ * This is called only if mpi3mr_pcierr_error_detected returns
+ * PCI_ERS_RESULT_CAN_RECOVER.
+ *
+ * Return: PCI_ERS_RESULT_DISCONNECT when the controller is
+ * unrecoverable or when the shost/mrioc reference cannot be
+ * found, else return PCI_ERS_RESULT_RECOVERED
+ */
+static pci_ers_result_t mpi3mr_pcierr_mmio_enabled(struct pci_dev *pdev)
+{
+       struct Scsi_Host *shost;
+       struct mpi3mr_ioc *mrioc;
+
+       dev_info(&pdev->dev, "%s: callback invoked\n", __func__);
+
+       shost = pci_get_drvdata(pdev);
+       mrioc = shost_priv(shost);
+
+       if (mrioc->unrecoverable)
+               return PCI_ERS_RESULT_DISCONNECT;
+
+       return PCI_ERS_RESULT_RECOVERED;
+}
+
 static const struct pci_device_id mpi3mr_pci_id_table[] = {
        {
                PCI_DEVICE_SUB(MPI3_MFGPAGE_VENDORID_BROADCOM,
@@ -5563,6 +5754,13 @@ static const struct pci_device_id mpi3mr_pci_id_table[] = {
 };
 MODULE_DEVICE_TABLE(pci, mpi3mr_pci_id_table);
 
+static struct pci_error_handlers mpi3mr_err_handler = {
+       .error_detected = mpi3mr_pcierr_error_detected,
+       .mmio_enabled = mpi3mr_pcierr_mmio_enabled,
+       .slot_reset = mpi3mr_pcierr_slot_reset,
+       .resume = mpi3mr_pcierr_resume,
+};
+
 static SIMPLE_DEV_PM_OPS(mpi3mr_pm_ops, mpi3mr_suspend, mpi3mr_resume);
 
 static struct pci_driver mpi3mr_pci_driver = {
@@ -5571,6 +5769,7 @@ static struct pci_driver mpi3mr_pci_driver = {
        .probe = mpi3mr_probe,
        .remove = mpi3mr_remove,
        .shutdown = mpi3mr_shutdown,
+       .err_handler = &mpi3mr_err_handler,
        .driver.pm = &mpi3mr_pm_ops,
 };