]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
scsi: mpi3mr: Reset controller on invalid I/O completion
authorRanjan Kumar <ranjan.kumar@broadcom.com>
Fri, 20 Mar 2026 09:03:24 +0000 (14:33 +0530)
committerMartin K. Petersen <martin.petersen@oracle.com>
Fri, 27 Mar 2026 21:07:35 +0000 (17:07 -0400)
Operational replies without a valid scsi_cmnd indicate an invalid I/O
completion and a potentially inconsistent controller state.  Track this
condition and allow the watchdog to trigger a soft reset to safely
recover.

Signed-off-by: Ranjan Kumar <ranjan.kumar@broadcom.com>
Link: https://patch.msgid.link/20260320090326.47544-2-ranjan.kumar@broadcom.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/mpi3mr/mpi3mr.h
drivers/scsi/mpi3mr/mpi3mr_fw.c
drivers/scsi/mpi3mr/mpi3mr_os.c

index 6e962092577dc6bd27a6d81250c4978e82b73f37..da141c185eef4f3ac9c8909aadd8f06cd043ccac 100644 (file)
@@ -323,6 +323,7 @@ enum mpi3mr_reset_reason {
        MPI3MR_RESET_FROM_CFG_REQ_TIMEOUT = 29,
        MPI3MR_RESET_FROM_SAS_TRANSPORT_TIMEOUT = 30,
        MPI3MR_RESET_FROM_TRIGGER = 31,
+       MPI3MR_RESET_FROM_INVALID_COMPLETION = 32,
 };
 
 #define MPI3MR_RESET_REASON_OSTYPE_LINUX       1
@@ -1183,6 +1184,7 @@ struct scmd_priv {
  * @num_tb_segs: Number of Segments in Trace buffer
  * @trace_buf_pool: DMA pool for Segmented trace buffer segments
  * @trace_buf: Trace buffer segments memory descriptor
+ * @invalid_io_comp: Invalid IO completion
  */
 struct mpi3mr_ioc {
        struct list_head list;
@@ -1394,6 +1396,7 @@ struct mpi3mr_ioc {
        u32 num_tb_segs;
        struct dma_pool *trace_buf_pool;
        struct segments *trace_buf;
+       u8 invalid_io_comp;
 
 };
 
index 81150bef11457eb91b2b1f740a48b61088b6a4bc..b1cd7e4cf40ebcb1fe2e9f69f91d1e5987ccab22 100644 (file)
@@ -996,6 +996,7 @@ static const struct {
        { MPI3MR_RESET_FROM_FIRMWARE, "firmware asynchronous reset" },
        { MPI3MR_RESET_FROM_CFG_REQ_TIMEOUT, "configuration request timeout"},
        { MPI3MR_RESET_FROM_SAS_TRANSPORT_TIMEOUT, "timeout of a SAS transport layer request" },
+       { MPI3MR_RESET_FROM_INVALID_COMPLETION, "invalid cmd completion" },
 };
 
 /**
@@ -2879,6 +2880,11 @@ static void mpi3mr_watchdog_work(struct work_struct *work)
                return;
        }
 
+       if (mrioc->invalid_io_comp) {
+               mpi3mr_soft_reset_handler(mrioc, MPI3MR_RESET_FROM_INVALID_COMPLETION, 1);
+               return;
+       }
+
        if (atomic_read(&mrioc->admin_pend_isr)) {
                ioc_err(mrioc, "Unprocessed admin ISR instance found\n"
                                "flush admin replies\n");
@@ -5644,6 +5650,7 @@ int mpi3mr_soft_reset_handler(struct mpi3mr_ioc *mrioc,
        ssleep(MPI3MR_RESET_TOPOLOGY_SETTLE_TIME);
 
 out:
+       mrioc->invalid_io_comp = 0;
        if (!retval) {
                mrioc->diagsave_timeout = 0;
                mrioc->reset_in_progress = 0;
index 90f8b9d1c2ac86ba9003b03c6b75e451e7ed6d7f..402d1f35d214233175965d8976d3db5dd4d08a07 100644 (file)
@@ -3459,8 +3459,15 @@ void mpi3mr_process_op_reply_desc(struct mpi3mr_ioc *mrioc,
        }
        scmd = mpi3mr_scmd_from_host_tag(mrioc, host_tag, qidx);
        if (!scmd) {
-               panic("%s: Cannot Identify scmd for host_tag 0x%x\n",
-                   mrioc->name, host_tag);
+               ioc_err(mrioc, "Cannot Identify scmd for host_tag 0x%x", host_tag);
+               ioc_err(mrioc,
+                   "reply_desc_type(%d) host_tag(%d(0x%04x)): qid(%d): command issued to\n"
+                   "handle(0x%04x) returned with ioc_status(0x%04x), log_info(0x%08x),\n"
+                   "scsi_state(0x%02x), scsi_status(0x%02x), xfer_count(%d), resp_data(0x%08x)\n",
+                   reply_desc_type, host_tag, host_tag, qidx+1, dev_handle, ioc_status,
+                   ioc_loginfo, scsi_state, scsi_status,  xfer_count,
+                   resp_data);
+               mrioc->invalid_io_comp = 1;
                goto out;
        }
        priv = scsi_cmd_priv(scmd);