From: Ranjan Kumar Date: Fri, 16 Jan 2026 06:07:17 +0000 (+0530) Subject: scsi: mpi3mr: Record and report controller firmware faults X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ec54b348f274fdd2bd32bbe74de6d62ae1a10a18;p=thirdparty%2Fkernel%2Flinux.git scsi: mpi3mr: Record and report controller firmware faults Capture and retain firmware fault codes and extended fault information whenever the controller enters a fault state. Maintain a persistent firmware fault counter, expose it via sysfs, and generate uevents to aid userspace diagnostics and failure analysis. Co-developed-by: Salomon Dushimirimana Signed-off-by: Salomon Dushimirimana Signed-off-by: Ranjan Kumar Link: https://patch.msgid.link/20260116060719.32937-7-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h index 590c017acf25c..58db60e13c131 100644 --- a/drivers/scsi/mpi3mr/mpi3mr.h +++ b/drivers/scsi/mpi3mr/mpi3mr.h @@ -1137,6 +1137,10 @@ struct scmd_priv { * @default_qcount: Total Default queues * @active_poll_qcount: Currently active poll queue count * @requested_poll_qcount: User requested poll queue count + * @fault_during_init: Indicates a firmware fault occurred during initialization + * @saved_fault_code: Firmware fault code captured at the time of failure + * @saved_fault_info: Additional firmware-provided fault information + * @fwfault_counter: Count of firmware faults detected by the driver * @bsg_dev: BSG device structure * @bsg_queue: Request queue for BSG device * @stop_bsgs: Stop BSG request flag @@ -1340,6 +1344,10 @@ struct mpi3mr_ioc { u16 default_qcount; u16 active_poll_qcount; u16 requested_poll_qcount; + u8 fault_during_init; + u32 saved_fault_code; + u32 saved_fault_info[3]; + u64 fwfault_counter; struct device bsg_dev; struct request_queue *bsg_queue; diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c index 37cca0573ddc3..1353a8ff9c85d 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_app.c +++ b/drivers/scsi/mpi3mr/mpi3mr_app.c @@ -3255,6 +3255,29 @@ adp_state_show(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR_RO(adp_state); +/** + * fwfault_count_show() - SysFS callback to show firmware fault count + * @dev: class device + * @attr: Device attribute + * @buf: Buffer to copy data into + * + * Displays the total number of firmware faults detected by the driver + * since the controller was initialized. + * + * Return: Number of bytes written to @buf + */ + +static ssize_t +fwfault_count_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct mpi3mr_ioc *mrioc = shost_priv(shost); + + return snprintf(buf, PAGE_SIZE, "%llu\n", mrioc->fwfault_counter); +} +static DEVICE_ATTR_RO(fwfault_count); + static struct attribute *mpi3mr_host_attrs[] = { &dev_attr_version_fw.attr, &dev_attr_fw_queue_depth.attr, @@ -3263,6 +3286,7 @@ static struct attribute *mpi3mr_host_attrs[] = { &dev_attr_reply_qfull_count.attr, &dev_attr_logging_level.attr, &dev_attr_adp_state.attr, + &dev_attr_fwfault_count.attr, NULL, }; diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 178738850541f..0d7515e7144bd 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -1108,6 +1108,31 @@ void mpi3mr_print_fault_info(struct mpi3mr_ioc *mrioc) } } +/** + * mpi3mr_save_fault_info - Save fault information + * @mrioc: Adapter instance reference + * + * Save the controller fault information if there is a + * controller fault. + * + * Return: Nothing. + */ +static void mpi3mr_save_fault_info(struct mpi3mr_ioc *mrioc) +{ + u32 ioc_status, i; + + ioc_status = readl(&mrioc->sysif_regs->ioc_status); + + if (ioc_status & MPI3_SYSIF_IOC_STATUS_FAULT) { + mrioc->saved_fault_code = readl(&mrioc->sysif_regs->fault) & + MPI3_SYSIF_FAULT_CODE_MASK; + for (i = 0; i < 3; i++) { + mrioc->saved_fault_info[i] = + readl(&mrioc->sysif_regs->fault_info[i]); + } + } +} + /** * mpi3mr_get_iocstate - Get IOC State * @mrioc: Adapter instance reference @@ -1249,6 +1274,60 @@ out_failed: mpi3mr_free_ioctl_dma_memory(mrioc); } +/** + * mpi3mr_fault_uevent_emit - Emit uevent for any controller + * fault + * @mrioc: Pointer to the mpi3mr_ioc structure for the controller instance + * + * This function is invoked when the controller undergoes any + * type of fault. + */ + +static void mpi3mr_fault_uevent_emit(struct mpi3mr_ioc *mrioc) +{ + struct kobj_uevent_env *env; + int ret; + + env = kzalloc(sizeof(*env), GFP_KERNEL); + if (!env) + return; + + ret = add_uevent_var(env, "DRIVER=%s", mrioc->driver_name); + if (ret) + goto out_free; + + ret = add_uevent_var(env, "IOC_ID=%u", mrioc->id); + if (ret) + goto out_free; + + ret = add_uevent_var(env, "FAULT_CODE=0x%08x", + mrioc->saved_fault_code); + if (ret) + goto out_free; + + ret = add_uevent_var(env, "FAULT_INFO0=0x%08x", + mrioc->saved_fault_info[0]); + if (ret) + goto out_free; + + ret = add_uevent_var(env, "FAULT_INFO1=0x%08x", + mrioc->saved_fault_info[1]); + if (ret) + goto out_free; + + ret = add_uevent_var(env, "FAULT_INFO2=0x%08x", + mrioc->saved_fault_info[2]); + if (ret) + goto out_free; + + kobject_uevent_env(&mrioc->shost->shost_gendev.kobj, + KOBJ_CHANGE, env->envp); + +out_free: + kfree(env); + +} + /** * mpi3mr_clear_reset_history - clear reset history * @mrioc: Adapter instance reference @@ -1480,6 +1559,10 @@ retry_bring_ioc_ready: if (ioc_state == MRIOC_STATE_FAULT) { timeout = MPI3_SYSIF_DIAG_SAVE_TIMEOUT * 10; mpi3mr_print_fault_info(mrioc); + mpi3mr_save_fault_info(mrioc); + mrioc->fault_during_init = 1; + mrioc->fwfault_counter++; + do { host_diagnostic = readl(&mrioc->sysif_regs->host_diagnostic); @@ -2577,6 +2660,9 @@ void mpi3mr_check_rh_fault_ioc(struct mpi3mr_ioc *mrioc, u32 reason_code) mpi3mr_set_trigger_data_in_all_hdb(mrioc, MPI3MR_HDB_TRIGGER_TYPE_FAULT, &trigger_data, 0); mpi3mr_print_fault_info(mrioc); + mpi3mr_save_fault_info(mrioc); + mrioc->fault_during_init = 1; + mrioc->fwfault_counter++; return; } @@ -2594,6 +2680,10 @@ void mpi3mr_check_rh_fault_ioc(struct mpi3mr_ioc *mrioc, u32 reason_code) break; msleep(100); } while (--timeout); + + mpi3mr_save_fault_info(mrioc); + mrioc->fault_during_init = 1; + mrioc->fwfault_counter++; } /** @@ -2770,6 +2860,11 @@ static void mpi3mr_watchdog_work(struct work_struct *work) union mpi3mr_trigger_data trigger_data; u16 reset_reason = MPI3MR_RESET_FROM_FAULT_WATCH; + if (mrioc->fault_during_init) { + mpi3mr_fault_uevent_emit(mrioc); + mrioc->fault_during_init = 0; + } + if (mrioc->reset_in_progress || mrioc->pci_err_recovery) return; @@ -2842,6 +2937,10 @@ static void mpi3mr_watchdog_work(struct work_struct *work) goto schedule_work; } + mpi3mr_save_fault_info(mrioc); + mpi3mr_fault_uevent_emit(mrioc); + mrioc->fwfault_counter++; + switch (trigger_data.fault) { case MPI3_SYSIF_FAULT_CODE_COMPLETE_RESET_NEEDED: case MPI3_SYSIF_FAULT_CODE_POWER_CYCLE_REQUIRED: @@ -5478,6 +5577,10 @@ int mpi3mr_soft_reset_handler(struct mpi3mr_ioc *mrioc, break; msleep(100); } while (--timeout); + + mpi3mr_save_fault_info(mrioc); + mpi3mr_fault_uevent_emit(mrioc); + mrioc->fwfault_counter++; mpi3mr_set_trigger_data_in_all_hdb(mrioc, MPI3MR_HDB_TRIGGER_TYPE_FAULT, &trigger_data, 0); }