scsi: core: Wake up the error handler when final completions race against each other

author David Jeffery <djeffery@redhat.com>

Tue, 13 Jan 2026 16:08:13 +0000 (11:08 -0500)

committer Martin K. Petersen <martin.petersen@oracle.com>

Sat, 17 Jan 2026 03:54:27 +0000 (22:54 -0500)
author David Jeffery <djeffery@redhat.com>
Tue, 13 Jan 2026 16:08:13 +0000 (11:08 -0500)
committer Martin K. Petersen <martin.petersen@oracle.com>
Sat, 17 Jan 2026 03:54:27 +0000 (22:54 -0500)
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c

index eebca96c1fc1560b1439a87a67cefa0048bb3fe2..b6e8730e049eb621dcb87e30abd106533c16e5b4 100644 (file)
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -282,11 +282,20 @@ static void scsi_eh_inc_host_failed(struct rcu_head *head)
  {
         struct scsi_cmnd *scmd = container_of(head, typeof(*scmd), rcu);
         struct Scsi_Host *shost = scmd->device->host;
-       unsigned int busy = scsi_host_busy(shost);
+       unsigned int busy;
         unsigned long flags;
  
         spin_lock_irqsave(shost->host_lock, flags);
         shost->host_failed++;
+       spin_unlock_irqrestore(shost->host_lock, flags);
+       /*
+        * The counting of busy requests needs to occur after adding to
+        * host_failed or after the lock acquire for adding to host_failed
+        * to prevent a race with host unbusy and missing an eh wakeup.
+        */
+       busy = scsi_host_busy(shost);
+
+       spin_lock_irqsave(shost->host_lock, flags);
         scsi_eh_wakeup(shost, busy);
         spin_unlock_irqrestore(shost->host_lock, flags);
  }
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c

index c7d6b76c86d24c79dacfc1c32d0a3a232316f04e..4a902c9dfd8b44e269ee62503211e34e491b13c0 100644 (file)
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -376,6 +376,14 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
         rcu_read_lock();
         __clear_bit(SCMD_STATE_INFLIGHT, &cmd->state);
         if (unlikely(scsi_host_in_recovery(shost))) {
+               /*
+                * Ensure the clear of SCMD_STATE_INFLIGHT is visible to
+                * other CPUs before counting busy requests. Otherwise,
+                * reordering can cause CPUs to race and miss an eh wakeup
+                * when no CPU sees all busy requests as done or timed out.
+                */
+               smp_mb();
+
                 unsigned int busy = scsi_host_busy(shost);
  
                 spin_lock_irqsave(shost->host_lock, flags);
author	David Jeffery <djeffery@redhat.com>
	Tue, 13 Jan 2026 16:08:13 +0000 (11:08 -0500)
committer	Martin K. Petersen <martin.petersen@oracle.com>
	Sat, 17 Jan 2026 03:54:27 +0000 (22:54 -0500)
drivers/scsi/scsi_error.c		patch \| blob \| blame \| history
drivers/scsi/scsi_lib.c		patch \| blob \| blame \| history