scsi: fix race between simultaneous decrements of ->host_failed

author Wei Fang <fangwei1@huawei.com>

Tue, 7 Jun 2016 06:53:56 +0000 (14:53 +0800)

committer Ben Hutchings <ben@decadent.org.uk>

Mon, 22 Aug 2016 21:38:13 +0000 (22:38 +0100)
author Wei Fang <fangwei1@huawei.com>
Tue, 7 Jun 2016 06:53:56 +0000 (14:53 +0800)
committer Ben Hutchings <ben@decadent.org.uk>
Mon, 22 Aug 2016 21:38:13 +0000 (22:38 +0100)
diff --git a/Documentation/scsi/scsi_eh.txt b/Documentation/scsi/scsi_eh.txt

index a0c85110a07ef7be95511ae13fa982b5692e1ca1..689ab9b9953a603b4b9e686c8aa22d9cc87842f7 100644 (file)
--- a/Documentation/scsi/scsi_eh.txt
+++ b/Documentation/scsi/scsi_eh.txt
@@ -263,19 +263,23 @@ scmd->allowed.
  
   3. scmd recovered
      ACTION: scsi_eh_finish_cmd() is invoked to EH-finish scmd
-       - shost->host_failed--
         - clear scmd->eh_eflags
         - scsi_setup_cmd_retry()
         - move from local eh_work_q to local eh_done_q
      LOCKING: none
+    CONCURRENCY: at most one thread per separate eh_work_q to
+                keep queue manipulation lockless
  
   4. EH completes
      ACTION: scsi_eh_flush_done_q() retries scmds or notifies upper
-           layer of failure.
+           layer of failure. May be called concurrently but must have
+           a no more than one thread per separate eh_work_q to
+           manipulate the queue locklessly
         - scmd is removed from eh_done_q and scmd->eh_entry is cleared
         - if retry is necessary, scmd is requeued using
            scsi_queue_insert()
         - otherwise, scsi_finish_command() is invoked for scmd
+       - zero shost->host_failed
      LOCKING: queue or finish function performs appropriate locking
  
  
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c

index 4ec95b76f6a1c655705b9cd2364c4ad2bf3ecf6a..0550c76f4e6c9025bc91af48dc996198f487ba1c 100644 (file)
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -605,7 +605,7 @@ void ata_scsi_error(struct Scsi_Host *host)
         ata_scsi_port_error_handler(host, ap);
  
         /* finish or retry handled scmd's and clean up */
-       WARN_ON(host->host_failed || !list_empty(&eh_work_q));
+       WARN_ON(!list_empty(&eh_work_q));
  
         DPRINTK("EXIT\n");
  }
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c

index 48790f1dbf79285cd0561e9101695b88c0d594fd..07ab11da83a0998de9722eb5404289a0fa7ed9f2 100644 (file)
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1115,7 +1115,6 @@ static int scsi_eh_action(struct scsi_cmnd *scmd, int rtn)
   */
  void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q)
  {
-       scmd->device->host->host_failed--;
         scmd->eh_eflags = 0;
         list_move_tail(&scmd->eh_entry, done_q);
  }
@@ -2198,6 +2197,9 @@ int scsi_error_handler(void *data)
                 else
                         scsi_unjam_host(shost);
  
+               /* All scmds have been handled */
+               shost->host_failed = 0;
+
                 /*
                  * Note - if the above fails completely, the action is to take
                  * individual devices offline and flush the queue of any
author	Wei Fang <fangwei1@huawei.com>
	Tue, 7 Jun 2016 06:53:56 +0000 (14:53 +0800)
committer	Ben Hutchings <ben@decadent.org.uk>
	Mon, 22 Aug 2016 21:38:13 +0000 (22:38 +0100)
Documentation/scsi/scsi_eh.txt		patch \| blob \| blame \| history
drivers/ata/libata-eh.c		patch \| blob \| blame \| history
drivers/scsi/scsi_error.c		patch \| blob \| blame \| history