4.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 30 Mar 2019 10:16:03 +0000 (11:16 +0100)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 30 Mar 2019 10:16:03 +0000 (11:16 +0100)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 30 Mar 2019 10:16:03 +0000 (11:16 +0100)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 30 Mar 2019 10:16:03 +0000 (11:16 +0100)
diff --git a/queue-4.4/scsi-sd-fix-a-race-between-closing-an-sd-device-and-sd-i-o.patch b/queue-4.4/scsi-sd-fix-a-race-between-closing-an-sd-device-and-sd-i-o.patch

new file mode 100644 (file)

index 0000000..d9b0761
--- /dev/null
+++ b/queue-4.4/scsi-sd-fix-a-race-between-closing-an-sd-device-and-sd-i-o.patch
@@ -0,0 +1,81 @@
+From c14a57264399efd39514a2329c591a4b954246d8 Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bvanassche@acm.org>
+Date: Mon, 25 Mar 2019 10:01:46 -0700
+Subject: scsi: sd: Fix a race between closing an sd device and sd I/O
+
+From: Bart Van Assche <bvanassche@acm.org>
+
+commit c14a57264399efd39514a2329c591a4b954246d8 upstream.
+
+The scsi_end_request() function calls scsi_cmd_to_driver() indirectly and
+hence needs the disk->private_data pointer. Avoid that that pointer is
+cleared before all affected I/O requests have finished. This patch avoids
+that the following crash occurs:
+
+Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
+Call trace:
+ scsi_mq_uninit_cmd+0x1c/0x30
+ scsi_end_request+0x7c/0x1b8
+ scsi_io_completion+0x464/0x668
+ scsi_finish_command+0xbc/0x160
+ scsi_eh_flush_done_q+0x10c/0x170
+ sas_scsi_recover_host+0x84c/0xa98 [libsas]
+ scsi_error_handler+0x140/0x5b0
+ kthread+0x100/0x12c
+ ret_from_fork+0x10/0x18
+
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Ming Lei <ming.lei@redhat.com>
+Cc: Hannes Reinecke <hare@suse.com>
+Cc: Johannes Thumshirn <jthumshirn@suse.de>
+Cc: Jason Yan <yanaijie@huawei.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Bart Van Assche <bvanassche@acm.org>
+Reported-by: Jason Yan <yanaijie@huawei.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/scsi/sd.c |   19 +++++++++++++------
+ 1 file changed, 13 insertions(+), 6 deletions(-)
+
+--- a/drivers/scsi/sd.c
++++ b/drivers/scsi/sd.c
+@@ -1276,11 +1276,6 @@ static void sd_release(struct gendisk *d
+                       scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW);
+       }
+ 
+-      /*
+-       * XXX and what if there are packets in flight and this close()
+-       * XXX is followed by a "rmmod sd_mod"?
+-       */
+-
+       scsi_disk_put(sdkp);
+ }
+ 
+@@ -3227,11 +3222,23 @@ static void scsi_disk_release(struct dev
+ {
+       struct scsi_disk *sdkp = to_scsi_disk(dev);
+       struct gendisk *disk = sdkp->disk;
+-      
++      struct request_queue *q = disk->queue;
++
+       spin_lock(&sd_index_lock);
+       ida_remove(&sd_index_ida, sdkp->index);
+       spin_unlock(&sd_index_lock);
+ 
++      /*
++       * Wait until all requests that are in progress have completed.
++       * This is necessary to avoid that e.g. scsi_end_request() crashes
++       * due to clearing the disk->private_data pointer. Wait from inside
++       * scsi_disk_release() instead of from sd_release() to avoid that
++       * freezing and unfreezing the request queue affects user space I/O
++       * in case multiple processes open a /dev/sd... node concurrently.
++       */
++      blk_mq_freeze_queue(q);
++      blk_mq_unfreeze_queue(q);
++
+       disk->private_data = NULL;
+       put_disk(disk);
+       put_device(&sdkp->device->sdev_gendev);
diff --git a/queue-4.4/scsi-zfcp-fix-rport-unblock-if-deleted-scsi-devices-on-scsi_host.patch b/queue-4.4/scsi-zfcp-fix-rport-unblock-if-deleted-scsi-devices-on-scsi_host.patch

new file mode 100644 (file)

index 0000000..1b1a171
--- /dev/null
+++ b/queue-4.4/scsi-zfcp-fix-rport-unblock-if-deleted-scsi-devices-on-scsi_host.patch
@@ -0,0 +1,83 @@
+From fe67888fc007a76b81e37da23ce5bd8fb95890b0 Mon Sep 17 00:00:00 2001
+From: Steffen Maier <maier@linux.ibm.com>
+Date: Tue, 26 Mar 2019 14:36:58 +0100
+Subject: scsi: zfcp: fix rport unblock if deleted SCSI devices on Scsi_Host
+
+From: Steffen Maier <maier@linux.ibm.com>
+
+commit fe67888fc007a76b81e37da23ce5bd8fb95890b0 upstream.
+
+An already deleted SCSI device can exist on the Scsi_Host and remain there
+because something still holds a reference.  A new SCSI device with the same
+H:C:T:L and FCP device, target port WWPN, and FCP LUN can be created.  When
+we try to unblock an rport, we still find the deleted SCSI device and
+return early because the zfcp_scsi_dev of that SCSI device is not
+ZFCP_STATUS_COMMON_UNBLOCKED. Hence we miss to unblock the rport, even if
+the new proper SCSI device would be in good state.
+
+Therefore, skip deleted SCSI devices when iterating the sdevs of the shost.
+[cf. __scsi_device_lookup{_by_target}() or scsi_device_get()]
+
+The following abbreviated trace sequence can indicate such problem:
+
+Area           : REC
+Tag            : ersfs_3
+LUN            : 0x4045400300000000
+WWPN           : 0x50050763031bd327
+LUN status     : 0x40000000     not ZFCP_STATUS_COMMON_UNBLOCKED
+Ready count    : n             not incremented yet
+Running count  : 0x00000000
+ERP want       : 0x01
+ERP need       : 0xc1          ZFCP_ERP_ACTION_NONE
+
+Area           : REC
+Tag            : ersfs_3
+LUN            : 0x4045400300000000
+WWPN           : 0x50050763031bd327
+LUN status     : 0x41000000
+Ready count    : n+1
+Running count  : 0x00000000
+ERP want       : 0x01
+ERP need       : 0x01
+
+...
+
+Area           : REC
+Level          : 4             only with increased trace level
+Tag            : ertru_l
+LUN            : 0x4045400300000000
+WWPN           : 0x50050763031bd327
+LUN status     : 0x40000000
+Request ID     : 0x0000000000000000
+ERP status     : 0x01800000
+ERP step       : 0x1000
+ERP action     : 0x01
+ERP count      : 0x00
+
+NOT followed by a trace record with tag "scpaddy"
+for WWPN 0x50050763031bd327.
+
+Signed-off-by: Steffen Maier <maier@linux.ibm.com>
+Fixes: 6f2ce1c6af37 ("scsi: zfcp: fix rport unblock race with LUN recovery")
+Cc: <stable@vger.kernel.org> #2.6.32+
+Reviewed-by: Jens Remus <jremus@linux.ibm.com>
+Reviewed-by: Benjamin Block <bblock@linux.ibm.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/scsi/zfcp_erp.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/s390/scsi/zfcp_erp.c
++++ b/drivers/s390/scsi/zfcp_erp.c
+@@ -1306,6 +1306,9 @@ static void zfcp_erp_try_rport_unblock(s
+               struct zfcp_scsi_dev *zsdev = sdev_to_zfcp(sdev);
+               int lun_status;
+ 
++              if (sdev->sdev_state == SDEV_DEL ||
++                  sdev->sdev_state == SDEV_CANCEL)
++                      continue;
+               if (zsdev->port != port)
+                       continue;
+               /* LUN under port of interest */
diff --git a/queue-4.4/scsi-zfcp-fix-scsi_eh-host-reset-with-port_forced-erp-for-non-npiv-fcp-devices.patch b/queue-4.4/scsi-zfcp-fix-scsi_eh-host-reset-with-port_forced-erp-for-non-npiv-fcp-devices.patch

new file mode 100644 (file)

index 0000000..3359d18
--- /dev/null
+++ b/queue-4.4/scsi-zfcp-fix-scsi_eh-host-reset-with-port_forced-erp-for-non-npiv-fcp-devices.patch
@@ -0,0 +1,94 @@
+From 242ec1455151267fe35a0834aa9038e4c4670884 Mon Sep 17 00:00:00 2001
+From: Steffen Maier <maier@linux.ibm.com>
+Date: Tue, 26 Mar 2019 14:36:59 +0100
+Subject: scsi: zfcp: fix scsi_eh host reset with port_forced ERP for non-NPIV FCP devices
+
+From: Steffen Maier <maier@linux.ibm.com>
+
+commit 242ec1455151267fe35a0834aa9038e4c4670884 upstream.
+
+Suppose more than one non-NPIV FCP device is active on the same channel.
+Send I/O to storage and have some of the pending I/O run into a SCSI
+command timeout, e.g. due to bit errors on the fibre. Now the error
+situation stops. However, we saw FCP requests continue to timeout in the
+channel. The abort will be successful, but the subsequent TUR fails.
+Scsi_eh starts. The LUN reset fails. The target reset fails.  The host
+reset only did an FCP device recovery. However, for non-NPIV FCP devices,
+this does not close and reopen ports on the SAN-side if other non-NPIV FCP
+device(s) share the same open ports.
+
+In order to resolve the continuing FCP request timeouts, we need to
+explicitly close and reopen ports on the SAN-side.
+
+This was missing since the beginning of zfcp in v2.6.0 history commit
+ea127f975424 ("[PATCH] s390 (7/7): zfcp host adapter.").
+
+Note: The FSF requests for forced port reopen could run into FSF request
+timeouts due to other reasons. This would trigger an internal FCP device
+recovery. Pending forced port reopen recoveries would get dismissed. So
+some ports might not get fully reopened during this host reset handler.
+However, subsequent I/O would trigger the above described escalation and
+eventually all ports would be forced reopen to resolve any continuing FCP
+request timeouts due to earlier bit errors.
+
+Signed-off-by: Steffen Maier <maier@linux.ibm.com>
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Cc: <stable@vger.kernel.org> #3.0+
+Reviewed-by: Jens Remus <jremus@linux.ibm.com>
+Reviewed-by: Benjamin Block <bblock@linux.ibm.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/scsi/zfcp_erp.c  |   14 ++++++++++++++
+ drivers/s390/scsi/zfcp_ext.h  |    2 ++
+ drivers/s390/scsi/zfcp_scsi.c |    4 ++++
+ 3 files changed, 20 insertions(+)
+
+--- a/drivers/s390/scsi/zfcp_erp.c
++++ b/drivers/s390/scsi/zfcp_erp.c
+@@ -652,6 +652,20 @@ static void zfcp_erp_strategy_memwait(st
+       add_timer(&erp_action->timer);
+ }
+ 
++void zfcp_erp_port_forced_reopen_all(struct zfcp_adapter *adapter,
++                                   int clear, char *dbftag)
++{
++      unsigned long flags;
++      struct zfcp_port *port;
++
++      write_lock_irqsave(&adapter->erp_lock, flags);
++      read_lock(&adapter->port_list_lock);
++      list_for_each_entry(port, &adapter->port_list, list)
++              _zfcp_erp_port_forced_reopen(port, clear, dbftag);
++      read_unlock(&adapter->port_list_lock);
++      write_unlock_irqrestore(&adapter->erp_lock, flags);
++}
++
+ static void _zfcp_erp_port_reopen_all(struct zfcp_adapter *adapter,
+                                     int clear, char *id)
+ {
+--- a/drivers/s390/scsi/zfcp_ext.h
++++ b/drivers/s390/scsi/zfcp_ext.h
+@@ -68,6 +68,8 @@ extern void zfcp_erp_clear_port_status(s
+ extern int  zfcp_erp_port_reopen(struct zfcp_port *, int, char *);
+ extern void zfcp_erp_port_shutdown(struct zfcp_port *, int, char *);
+ extern void zfcp_erp_port_forced_reopen(struct zfcp_port *, int, char *);
++extern void zfcp_erp_port_forced_reopen_all(struct zfcp_adapter *adapter,
++                                          int clear, char *dbftag);
+ extern void zfcp_erp_set_lun_status(struct scsi_device *, u32);
+ extern void zfcp_erp_clear_lun_status(struct scsi_device *, u32);
+ extern void zfcp_erp_lun_reopen(struct scsi_device *, int, char *);
+--- a/drivers/s390/scsi/zfcp_scsi.c
++++ b/drivers/s390/scsi/zfcp_scsi.c
+@@ -326,6 +326,10 @@ static int zfcp_scsi_eh_host_reset_handl
+       struct zfcp_adapter *adapter = zfcp_sdev->port->adapter;
+       int ret = SUCCESS, fc_ret;
+ 
++      if (!(adapter->connection_features & FSF_FEATURE_NPIV_MODE)) {
++              zfcp_erp_port_forced_reopen_all(adapter, 0, "schrh_p");
++              zfcp_erp_wait(adapter);
++      }
+       zfcp_erp_adapter_reopen(adapter, 0, "schrh_1");
+       zfcp_erp_wait(adapter);
+       fc_ret = fc_block_scsi_eh(scpnt);
diff --git a/queue-4.4/series b/queue-4.4/series

index 0d333e0a0c620f55ae7687cbffac9903743d2b65..60779ffa5cf77c46aad24e116cf57dfdd2410209 100644 (file)
--- a/queue-4.4/series
+++ b/queue-4.4/series
@@ -105,3 +105,6 @@ alsa-rawmidi-fix-potential-spectre-v1-vulnerability.patch
  alsa-seq-oss-fix-spectre-v1-vulnerability.patch
  alsa-pcm-fix-possible-oob-access-in-pcm-oss-plugins.patch
  alsa-pcm-don-t-suspend-stream-in-unrecoverable-pcm-state.patch
+scsi-sd-fix-a-race-between-closing-an-sd-device-and-sd-i-o.patch
+scsi-zfcp-fix-rport-unblock-if-deleted-scsi-devices-on-scsi_host.patch
+scsi-zfcp-fix-scsi_eh-host-reset-with-port_forced-erp-for-non-npiv-fcp-devices.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 30 Mar 2019 10:16:03 +0000 (11:16 +0100)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 30 Mar 2019 10:16:03 +0000 (11:16 +0100)
queue-4.4/scsi-sd-fix-a-race-between-closing-an-sd-device-and-sd-i-o.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/scsi-zfcp-fix-rport-unblock-if-deleted-scsi-devices-on-scsi_host.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/scsi-zfcp-fix-scsi_eh-host-reset-with-port_forced-erp-for-non-npiv-fcp-devices.patch	[new file with mode: 0644]	patch \| blob
queue-4.4/series		patch \| blob \| blame \| history