scsi: core: Wake up the error handler when final completions race against each other

author David Jeffery <djeffery@redhat.com>

Tue, 13 Jan 2026 16:08:13 +0000 (11:08 -0500)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fri, 30 Jan 2026 09:27:36 +0000 (10:27 +0100)
author David Jeffery <djeffery@redhat.com>
Tue, 13 Jan 2026 16:08:13 +0000 (11:08 -0500)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 30 Jan 2026 09:27:36 +0000 (10:27 +0100)
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c

index 5882b5f80c0497e445c84279cfc852e6c0824e32..c4c05cf38aa55f22a272d7f5ec1978040c75525e 100644 (file)
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -282,11 +282,20 @@ static void scsi_eh_inc_host_failed(struct rcu_head *head)
  {
         struct scsi_cmnd *scmd = container_of(head, typeof(*scmd), rcu);
         struct Scsi_Host *shost = scmd->device->host;
-       unsigned int busy = scsi_host_busy(shost);
+       unsigned int busy;
         unsigned long flags;
  
         spin_lock_irqsave(shost->host_lock, flags);
         shost->host_failed++;
+       spin_unlock_irqrestore(shost->host_lock, flags);
+       /*
+        * The counting of busy requests needs to occur after adding to
+        * host_failed or after the lock acquire for adding to host_failed
+        * to prevent a race with host unbusy and missing an eh wakeup.
+        */
+       busy = scsi_host_busy(shost);
+
+       spin_lock_irqsave(shost->host_lock, flags);
         scsi_eh_wakeup(shost, busy);
         spin_unlock_irqrestore(shost->host_lock, flags);
  }
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c

index bd75e3ebc14da399c964e54d7a9507ea8c423c8f..efd1f1d6e4e9b0384e218423fdba60ac9b70984b 100644 (file)
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -278,6 +278,14 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
         rcu_read_lock();
         __clear_bit(SCMD_STATE_INFLIGHT, &cmd->state);
         if (unlikely(scsi_host_in_recovery(shost))) {
+               /*
+                * Ensure the clear of SCMD_STATE_INFLIGHT is visible to
+                * other CPUs before counting busy requests. Otherwise,
+                * reordering can cause CPUs to race and miss an eh wakeup
+                * when no CPU sees all busy requests as done or timed out.
+                */
+               smp_mb();
+
                 unsigned int busy = scsi_host_busy(shost);
  
                 spin_lock_irqsave(shost->host_lock, flags);
author	David Jeffery <djeffery@redhat.com>
	Tue, 13 Jan 2026 16:08:13 +0000 (11:08 -0500)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Fri, 30 Jan 2026 09:27:36 +0000 (10:27 +0100)
drivers/scsi/scsi_error.c		patch \| blob \| blame \| history
drivers/scsi/scsi_lib.c		patch \| blob \| blame \| history