]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
scsi: mpi3mr: Record and report controller firmware faults
authorRanjan Kumar <ranjan.kumar@broadcom.com>
Fri, 16 Jan 2026 06:07:17 +0000 (11:37 +0530)
committerMartin K. Petersen <martin.petersen@oracle.com>
Sat, 24 Jan 2026 03:16:16 +0000 (22:16 -0500)
Capture and retain firmware fault codes and extended fault information
whenever the controller enters a fault state.

Maintain a persistent firmware fault counter, expose it via sysfs, and
generate uevents to aid userspace diagnostics and failure analysis.

Co-developed-by: Salomon Dushimirimana <salomondush@google.com>
Signed-off-by: Salomon Dushimirimana <salomondush@google.com>
Signed-off-by: Ranjan Kumar <ranjan.kumar@broadcom.com>
Link: https://patch.msgid.link/20260116060719.32937-7-ranjan.kumar@broadcom.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/mpi3mr/mpi3mr.h
drivers/scsi/mpi3mr/mpi3mr_app.c
drivers/scsi/mpi3mr/mpi3mr_fw.c

index 590c017acf25cb628223eb69ed21eea64985daa5..58db60e13c13141d01548ed74b64cb032b23f18f 100644 (file)
@@ -1137,6 +1137,10 @@ struct scmd_priv {
  * @default_qcount: Total Default queues
  * @active_poll_qcount: Currently active poll queue count
  * @requested_poll_qcount: User requested poll queue count
+ * @fault_during_init: Indicates a firmware fault occurred during initialization
+ * @saved_fault_code: Firmware fault code captured at the time of failure
+ * @saved_fault_info: Additional firmware-provided fault information
+ * @fwfault_counter: Count of firmware faults detected by the driver
  * @bsg_dev: BSG device structure
  * @bsg_queue: Request queue for BSG device
  * @stop_bsgs: Stop BSG request flag
@@ -1340,6 +1344,10 @@ struct mpi3mr_ioc {
        u16 default_qcount;
        u16 active_poll_qcount;
        u16 requested_poll_qcount;
+       u8 fault_during_init;
+       u32 saved_fault_code;
+       u32 saved_fault_info[3];
+       u64 fwfault_counter;
 
        struct device bsg_dev;
        struct request_queue *bsg_queue;
index 37cca0573ddc3373fd34aa855638ba5bac17e8a1..1353a8ff9c85d5bde88a97a13ed1d4f127076b8b 100644 (file)
@@ -3255,6 +3255,29 @@ adp_state_show(struct device *dev, struct device_attribute *attr,
 
 static DEVICE_ATTR_RO(adp_state);
 
+/**
+ * fwfault_count_show() - SysFS callback to show firmware fault count
+ * @dev: class device
+ * @attr: Device attribute
+ * @buf: Buffer to copy data into
+ *
+ * Displays the total number of firmware faults detected by the driver
+ * since the controller was initialized.
+ *
+ * Return: Number of bytes written to @buf
+ */
+
+static ssize_t
+fwfault_count_show(struct device *dev, struct device_attribute *attr,
+       char *buf)
+{
+       struct Scsi_Host *shost = class_to_shost(dev);
+       struct mpi3mr_ioc *mrioc = shost_priv(shost);
+
+       return snprintf(buf, PAGE_SIZE, "%llu\n", mrioc->fwfault_counter);
+}
+static DEVICE_ATTR_RO(fwfault_count);
+
 static struct attribute *mpi3mr_host_attrs[] = {
        &dev_attr_version_fw.attr,
        &dev_attr_fw_queue_depth.attr,
@@ -3263,6 +3286,7 @@ static struct attribute *mpi3mr_host_attrs[] = {
        &dev_attr_reply_qfull_count.attr,
        &dev_attr_logging_level.attr,
        &dev_attr_adp_state.attr,
+       &dev_attr_fwfault_count.attr,
        NULL,
 };
 
index 178738850541f28a428d8a6b6a6a1904231c0c2e..0d7515e7144bdf79cb2125bd7248da17364a680d 100644 (file)
@@ -1108,6 +1108,31 @@ void mpi3mr_print_fault_info(struct mpi3mr_ioc *mrioc)
        }
 }
 
+/**
+ * mpi3mr_save_fault_info - Save fault information
+ * @mrioc: Adapter instance reference
+ *
+ * Save the controller fault information if there is a
+ * controller fault.
+ *
+ * Return: Nothing.
+ */
+static void mpi3mr_save_fault_info(struct mpi3mr_ioc *mrioc)
+{
+       u32 ioc_status, i;
+
+       ioc_status = readl(&mrioc->sysif_regs->ioc_status);
+
+       if (ioc_status & MPI3_SYSIF_IOC_STATUS_FAULT) {
+               mrioc->saved_fault_code = readl(&mrioc->sysif_regs->fault) &
+                   MPI3_SYSIF_FAULT_CODE_MASK;
+               for (i = 0; i < 3; i++) {
+                       mrioc->saved_fault_info[i] =
+                       readl(&mrioc->sysif_regs->fault_info[i]);
+               }
+       }
+}
+
 /**
  * mpi3mr_get_iocstate - Get IOC State
  * @mrioc: Adapter instance reference
@@ -1249,6 +1274,60 @@ out_failed:
        mpi3mr_free_ioctl_dma_memory(mrioc);
 }
 
+/**
+ * mpi3mr_fault_uevent_emit - Emit uevent for any controller
+ * fault
+ * @mrioc: Pointer to the mpi3mr_ioc structure for the controller instance
+ *
+ * This function is invoked when the controller undergoes any
+ * type of fault.
+ */
+
+static void mpi3mr_fault_uevent_emit(struct mpi3mr_ioc *mrioc)
+{
+       struct kobj_uevent_env *env;
+       int ret;
+
+       env = kzalloc(sizeof(*env), GFP_KERNEL);
+       if (!env)
+               return;
+
+       ret = add_uevent_var(env, "DRIVER=%s", mrioc->driver_name);
+       if (ret)
+               goto out_free;
+
+       ret = add_uevent_var(env, "IOC_ID=%u", mrioc->id);
+       if (ret)
+               goto out_free;
+
+       ret = add_uevent_var(env, "FAULT_CODE=0x%08x",
+                           mrioc->saved_fault_code);
+       if (ret)
+               goto out_free;
+
+       ret = add_uevent_var(env, "FAULT_INFO0=0x%08x",
+                            mrioc->saved_fault_info[0]);
+       if (ret)
+               goto out_free;
+
+       ret = add_uevent_var(env, "FAULT_INFO1=0x%08x",
+                           mrioc->saved_fault_info[1]);
+       if (ret)
+               goto out_free;
+
+       ret = add_uevent_var(env, "FAULT_INFO2=0x%08x",
+                           mrioc->saved_fault_info[2]);
+       if (ret)
+               goto out_free;
+
+       kobject_uevent_env(&mrioc->shost->shost_gendev.kobj,
+                       KOBJ_CHANGE, env->envp);
+
+out_free:
+       kfree(env);
+
+}
+
 /**
  * mpi3mr_clear_reset_history - clear reset history
  * @mrioc: Adapter instance reference
@@ -1480,6 +1559,10 @@ retry_bring_ioc_ready:
                if (ioc_state == MRIOC_STATE_FAULT) {
                        timeout = MPI3_SYSIF_DIAG_SAVE_TIMEOUT * 10;
                        mpi3mr_print_fault_info(mrioc);
+                       mpi3mr_save_fault_info(mrioc);
+                       mrioc->fault_during_init = 1;
+                       mrioc->fwfault_counter++;
+
                        do {
                                host_diagnostic =
                                        readl(&mrioc->sysif_regs->host_diagnostic);
@@ -2577,6 +2660,9 @@ void mpi3mr_check_rh_fault_ioc(struct mpi3mr_ioc *mrioc, u32 reason_code)
                mpi3mr_set_trigger_data_in_all_hdb(mrioc,
                    MPI3MR_HDB_TRIGGER_TYPE_FAULT, &trigger_data, 0);
                mpi3mr_print_fault_info(mrioc);
+               mpi3mr_save_fault_info(mrioc);
+               mrioc->fault_during_init = 1;
+               mrioc->fwfault_counter++;
                return;
        }
 
@@ -2594,6 +2680,10 @@ void mpi3mr_check_rh_fault_ioc(struct mpi3mr_ioc *mrioc, u32 reason_code)
                        break;
                msleep(100);
        } while (--timeout);
+
+       mpi3mr_save_fault_info(mrioc);
+       mrioc->fault_during_init = 1;
+       mrioc->fwfault_counter++;
 }
 
 /**
@@ -2770,6 +2860,11 @@ static void mpi3mr_watchdog_work(struct work_struct *work)
        union mpi3mr_trigger_data trigger_data;
        u16 reset_reason = MPI3MR_RESET_FROM_FAULT_WATCH;
 
+       if (mrioc->fault_during_init) {
+               mpi3mr_fault_uevent_emit(mrioc);
+               mrioc->fault_during_init = 0;
+       }
+
        if (mrioc->reset_in_progress || mrioc->pci_err_recovery)
                return;
 
@@ -2842,6 +2937,10 @@ static void mpi3mr_watchdog_work(struct work_struct *work)
                goto schedule_work;
        }
 
+       mpi3mr_save_fault_info(mrioc);
+       mpi3mr_fault_uevent_emit(mrioc);
+       mrioc->fwfault_counter++;
+
        switch (trigger_data.fault) {
        case MPI3_SYSIF_FAULT_CODE_COMPLETE_RESET_NEEDED:
        case MPI3_SYSIF_FAULT_CODE_POWER_CYCLE_REQUIRED:
@@ -5478,6 +5577,10 @@ int mpi3mr_soft_reset_handler(struct mpi3mr_ioc *mrioc,
                                        break;
                                msleep(100);
                        } while (--timeout);
+
+                       mpi3mr_save_fault_info(mrioc);
+                       mpi3mr_fault_uevent_emit(mrioc);
+                       mrioc->fwfault_counter++;
                        mpi3mr_set_trigger_data_in_all_hdb(mrioc,
                            MPI3MR_HDB_TRIGGER_TYPE_FAULT, &trigger_data, 0);
                }