]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
scsi: core: Run queues for all non-SDEV_DEL devices from scsi_run_host_queues
authorDavid Jeffery <djeffery@redhat.com>
Fri, 15 May 2026 18:09:41 +0000 (14:09 -0400)
committerMartin K. Petersen <martin.petersen@oracle.com>
Sat, 23 May 2026 01:09:23 +0000 (21:09 -0400)
While a SCSI host is in a recovery state, scsi_mq_requeue_cmd() will not
set the requeue list for a requeued command to be kicked in the future.
The expectation is a call to scsi_run_host_queues() will kick all SCSI
devices once the recovery state is cleared.

However, scsi_run_host_queues() uses shost_for_each_device() which uses
scsi_device_get() and so will ignore devices in a partially removed
state like SDEV_CANCEL. But these devices may also have requeued
requests, leaving their requests stuck from not being kicked and causing
the removal process of the device to hang.

scsi_run_host_queues() needs to run against more devices than the macro
shost_for_each_device() allows. Instead of using the too limiting
scsi_device_get() state checks, only ignore devices in SDEV_DEL state or
when unable to acquire a reference. Attempt to run the queues for all
other devices when scsi_run_host_queues() is called.

Fixes: 8b566edbdbfb ("scsi: core: Only kick the requeue list if necessary")
Signed-off-by: David Jeffery <djeffery@redhat.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://patch.msgid.link/20260515180941.9698-1-djeffery@redhat.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/scsi_lib.c

index 6e8c7a42603e5b13deeaa487f62c4bec71fcc106..85eef401925a27d334e65cf9fc8da5a4e4bb5ce9 100644 (file)
@@ -575,10 +575,33 @@ void scsi_requeue_run_queue(struct work_struct *work)
 
 void scsi_run_host_queues(struct Scsi_Host *shost)
 {
-       struct scsi_device *sdev;
+       struct scsi_device *sdev, *prev = NULL;
+       unsigned long flags;
 
-       shost_for_each_device(sdev, shost)
+       spin_lock_irqsave(shost->host_lock, flags);
+       __shost_for_each_device(sdev, shost) {
+               /*
+                * Only skip devices so deep into removal they will never need
+                * another kick to their queues. Thus scsi_device_get() cannot
+                * be used as it would skip devices in SDEV_CANCEL state which
+                * may need a queue kick.
+                */
+               if (sdev->sdev_state == SDEV_DEL ||
+                   !get_device(&sdev->sdev_gendev))
+                       continue;
+               spin_unlock_irqrestore(shost->host_lock, flags);
+
+               if (prev)
+                       put_device(&prev->sdev_gendev);
                scsi_run_queue(sdev->request_queue);
+
+               prev = sdev;
+
+               spin_lock_irqsave(shost->host_lock, flags);
+       }
+       spin_unlock_irqrestore(shost->host_lock, flags);
+       if (prev)
+               put_device(&prev->sdev_gendev);
 }
 
 static void scsi_uninit_cmd(struct scsi_cmnd *cmd)