]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
scsi: core: alua: I/O errors for ALUA state transitions
authorMartin Wilck <martin.wilck@suse.com>
Tue, 14 May 2024 14:03:44 +0000 (16:03 +0200)
committerMartin K. Petersen <martin.petersen@oracle.com>
Wed, 15 May 2024 13:46:13 +0000 (09:46 -0400)
When a host is configured with a few LUNs and I/O is running, injecting FC
faults repeatedly leads to path recovery problems.  The LUNs have 4 paths
each and 3 of them come back active after say an FC fault which makes 2 of
the paths go down, instead of all 4. This happens after several iterations
of continuous FC faults.

Reason here is that we're returning an I/O error whenever we're
encountering sense code 06/04/0a (LOGICAL UNIT NOT ACCESSIBLE, ASYMMETRIC
ACCESS STATE TRANSITION) instead of retrying.

[mwilck: The original patch was developed by Rajashekhar M A and Hannes
Reinecke. I moved the code to alua_check_sense() as suggested by Mike
Christie [1]. Evan Milne had raised the question whether pg->state should
be set to transitioning in the UA case [2]. I believe that doing this is
correct. SCSI_ACCESS_STATE_TRANSITIONING by itself doesn't cause I/O
errors. Our handler schedules an RTPG, which will only result in an I/O
error condition if the transitioning timeout expires.]

[1] https://lore.kernel.org/all/0bc96e82-fdda-4187-148d-5b34f81d4942@oracle.com/
[2] https://lore.kernel.org/all/CAGtn9r=kicnTDE2o7Gt5Y=yoidHYD7tG8XdMHEBJTBraVEoOCw@mail.gmail.com/

Co-developed-by: Rajashekhar M A <rajs@netapp.com>
Co-developed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin Wilck <martin.wilck@suse.com>
Link: https://lore.kernel.org/r/20240514140344.19538-1-mwilck@suse.com
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/device_handler/scsi_dh_alua.c

index a226dc1b65d715f03addcf638fa52510360b1272..4eb0837298d4d2dddf159fbdfc348a6e5a8386c6 100644 (file)
@@ -414,28 +414,40 @@ static char print_alua_state(unsigned char state)
        }
 }
 
-static enum scsi_disposition alua_check_sense(struct scsi_device *sdev,
-                                             struct scsi_sense_hdr *sense_hdr)
+static void alua_handle_state_transition(struct scsi_device *sdev)
 {
        struct alua_dh_data *h = sdev->handler_data;
        struct alua_port_group *pg;
 
+       rcu_read_lock();
+       pg = rcu_dereference(h->pg);
+       if (pg)
+               pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
+       rcu_read_unlock();
+       alua_check(sdev, false);
+}
+
+static enum scsi_disposition alua_check_sense(struct scsi_device *sdev,
+                                             struct scsi_sense_hdr *sense_hdr)
+{
        switch (sense_hdr->sense_key) {
        case NOT_READY:
                if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
                        /*
                         * LUN Not Accessible - ALUA state transition
                         */
-                       rcu_read_lock();
-                       pg = rcu_dereference(h->pg);
-                       if (pg)
-                               pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
-                       rcu_read_unlock();
-                       alua_check(sdev, false);
+                       alua_handle_state_transition(sdev);
                        return NEEDS_RETRY;
                }
                break;
        case UNIT_ATTENTION:
+               if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
+                       /*
+                        * LUN Not Accessible - ALUA state transition
+                        */
+                       alua_handle_state_transition(sdev);
+                       return NEEDS_RETRY;
+               }
                if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
                        /*
                         * Power On, Reset, or Bus Device Reset.
@@ -502,7 +514,8 @@ static int alua_tur(struct scsi_device *sdev)
 
        retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ,
                                      ALUA_FAILOVER_RETRIES, &sense_hdr);
-       if (sense_hdr.sense_key == NOT_READY &&
+       if ((sense_hdr.sense_key == NOT_READY ||
+            sense_hdr.sense_key == UNIT_ATTENTION) &&
            sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
                return SCSI_DH_RETRY;
        else if (retval)