]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
fixes for 4.9
authorSasha Levin <sashal@kernel.org>
Sun, 27 Oct 2019 09:27:22 +0000 (05:27 -0400)
committerSasha Levin <sashal@kernel.org>
Sun, 27 Oct 2019 09:27:22 +0000 (05:27 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-4.9/scsi-zfcp-fix-reaction-on-bit-error-threshold-notifi.patch [new file with mode: 0644]
queue-4.9/series

diff --git a/queue-4.9/scsi-zfcp-fix-reaction-on-bit-error-threshold-notifi.patch b/queue-4.9/scsi-zfcp-fix-reaction-on-bit-error-threshold-notifi.patch
new file mode 100644 (file)
index 0000000..7ad93df
--- /dev/null
@@ -0,0 +1,86 @@
+From f6de552d2e83283f92ce4400e6d7f49a24380a32 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Oct 2019 12:49:49 +0200
+Subject: scsi: zfcp: fix reaction on bit error threshold notification
+
+From: Steffen Maier <maier@linux.ibm.com>
+
+[ Upstream commit 2190168aaea42c31bff7b9a967e7b045f07df095 ]
+
+On excessive bit errors for the FCP channel ingress fibre path, the channel
+notifies us.  Previously, we only emitted a kernel message and a trace
+record.  Since performance can become suboptimal with I/O timeouts due to
+bit errors, we now stop using an FCP device by default on channel
+notification so multipath on top can timely failover to other paths.  A new
+module parameter zfcp.ber_stop can be used to get zfcp old behavior.
+
+User explanation of new kernel message:
+
+ * Description:
+ * The FCP channel reported that its bit error threshold has been exceeded.
+ * These errors might result from a problem with the physical components
+ * of the local fibre link into the FCP channel.
+ * The problem might be damage or malfunction of the cable or
+ * cable connection between the FCP channel and
+ * the adjacent fabric switch port or the point-to-point peer.
+ * Find details about the errors in the HBA trace for the FCP device.
+ * The zfcp device driver closed down the FCP device
+ * to limit the performance impact from possible I/O command timeouts.
+ * User action:
+ * Check for problems on the local fibre link, ensure that fibre optics are
+ * clean and functional, and all cables are properly plugged.
+ * After the repair action, you can manually recover the FCP device by
+ * writing "0" into its "failed" sysfs attribute.
+ * If recovery through sysfs is not possible, set the CHPID of the device
+ * offline and back online on the service element.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Cc: <stable@vger.kernel.org> #2.6.30+
+Link: https://lore.kernel.org/r/20191001104949.42810-1-maier@linux.ibm.com
+Reviewed-by: Jens Remus <jremus@linux.ibm.com>
+Reviewed-by: Benjamin Block <bblock@linux.ibm.com>
+Signed-off-by: Steffen Maier <maier@linux.ibm.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/s390/scsi/zfcp_fsf.c | 16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
+index 1964391db9047..a3aaef4c53a3c 100644
+--- a/drivers/s390/scsi/zfcp_fsf.c
++++ b/drivers/s390/scsi/zfcp_fsf.c
+@@ -20,6 +20,11 @@
+ struct kmem_cache *zfcp_fsf_qtcb_cache;
++static bool ber_stop = true;
++module_param(ber_stop, bool, 0600);
++MODULE_PARM_DESC(ber_stop,
++               "Shuts down FCP devices for FCP channels that report a bit-error count in excess of its threshold (default on)");
++
+ static void zfcp_fsf_request_timeout_handler(unsigned long data)
+ {
+       struct zfcp_adapter *adapter = (struct zfcp_adapter *) data;
+@@ -231,10 +236,15 @@ static void zfcp_fsf_status_read_handler(struct zfcp_fsf_req *req)
+       case FSF_STATUS_READ_SENSE_DATA_AVAIL:
+               break;
+       case FSF_STATUS_READ_BIT_ERROR_THRESHOLD:
+-              dev_warn(&adapter->ccw_device->dev,
+-                       "The error threshold for checksum statistics "
+-                       "has been exceeded\n");
+               zfcp_dbf_hba_bit_err("fssrh_3", req);
++              if (ber_stop) {
++                      dev_warn(&adapter->ccw_device->dev,
++                               "All paths over this FCP device are disused because of excessive bit errors\n");
++                      zfcp_erp_adapter_shutdown(adapter, 0, "fssrh_b");
++              } else {
++                      dev_warn(&adapter->ccw_device->dev,
++                               "The error threshold for checksum statistics has been exceeded\n");
++              }
+               break;
+       case FSF_STATUS_READ_LINK_DOWN:
+               zfcp_fsf_status_read_link_down(req);
+-- 
+2.20.1
+
index dcc5749ec822b8c9b473db6b455410aa093209bb..047a071dc6668363782cf72b79898fa500dc93f4 100644 (file)
@@ -35,3 +35,4 @@ asoc-rsnd-reinitialize-bit-clock-inversion-flag-for-every-format-setting.patch
 cfg80211-wext-avoid-copying-malformed-ssids.patch
 mac80211-reject-malformed-ssid-elements.patch
 drm-edid-add-6-bpc-quirk-for-sdc-panel-in-lenovo-g50.patch
+scsi-zfcp-fix-reaction-on-bit-error-threshold-notifi.patch