--- /dev/null
+From a099b7b1fc1f0418ab8d79ecf98153e1e134656e Mon Sep 17 00:00:00 2001
+From: Benjamin Block <bblock@linux.vnet.ibm.com>
+Date: Fri, 28 Jul 2017 12:30:52 +0200
+Subject: scsi: zfcp: add handling for FCP_RESID_OVER to the fcp ingress path
+
+From: Benjamin Block <bblock@linux.vnet.ibm.com>
+
+commit a099b7b1fc1f0418ab8d79ecf98153e1e134656e upstream.
+
+Up until now zfcp would just ignore the FCP_RESID_OVER flag in the FCP
+response IU. When this flag is set, it is possible, in regards to the
+FCP standard, that the storage-server processes the command normally, up
+to the point where data is missing and simply ignores those.
+
+In this case no CHECK CONDITION would be set, and because we ignored the
+FCP_RESID_OVER flag we resulted in at least a data loss or even
+-corruption as a follow-up error, depending on how the
+applications/layers on top behave. To prevent this, we now set the
+host-byte of the corresponding scsi_cmnd to DID_ERROR.
+
+Other storage-behaviors, where the same condition results in a CHECK
+CONDITION set in the answer, don't need to be changed as they are
+handled in the mid-layer already.
+
+Following is an example trace record decoded with zfcpdbf from the
+s390-tools package. We forcefully injected a fc_dl which is one byte too
+small:
+
+Timestamp : ...
+Area : SCSI
+Subarea : 00
+Level : 3
+Exception : -
+CPU ID : ..
+Caller : 0x...
+Record ID : 1
+Tag : rsl_err
+Request ID : 0x...
+SCSI ID : 0x...
+SCSI LUN : 0x...
+SCSI result : 0x00070000
+ ^^DID_ERROR
+SCSI retries : 0x..
+SCSI allowed : 0x..
+SCSI scribble : 0x...
+SCSI opcode : 2a000000 00000000 08000000 00000000
+FCP rsp inf cod: 0x00
+FCP rsp IU : 00000000 00000000 00000400 00000001
+ ^^fr_flags==FCP_RESID_OVER
+ ^^fr_status==SAM_STAT_GOOD
+ ^^^^^^^^fr_resid
+ 00000000 00000000
+
+As of now, we don't actively handle to possibility that a response IU
+has both flags - FCP_RESID_OVER and FCP_RESID_UNDER - set at once.
+
+Reported-by: Luke M. Hopkins <lmhopkin@us.ibm.com>
+Reviewed-by: Steffen Maier <maier@linux.vnet.ibm.com>
+Fixes: 553448f6c483 ("[SCSI] zfcp: Message cleanup")
+Fixes: ea127f975424 ("[PATCH] s390 (7/7): zfcp host adapter.") (tglx/history.git)
+Signed-off-by: Benjamin Block <bblock@linux.vnet.ibm.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/scsi/zfcp_fc.h | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/s390/scsi/zfcp_fc.h
++++ b/drivers/s390/scsi/zfcp_fc.h
+@@ -4,7 +4,7 @@
+ * Fibre Channel related definitions and inline functions for the zfcp
+ * device driver
+ *
+- * Copyright IBM Corp. 2009
++ * Copyright IBM Corp. 2009, 2017
+ */
+
+ #ifndef ZFCP_FC_H
+@@ -291,6 +291,10 @@ void zfcp_fc_eval_fcp_rsp(struct fcp_res
+ !(rsp_flags & FCP_SNS_LEN_VAL) &&
+ fcp_rsp->resp.fr_status == SAM_STAT_GOOD)
+ set_host_byte(scsi, DID_ERROR);
++ } else if (unlikely(rsp_flags & FCP_RESID_OVER)) {
++ /* FCP_DL was not sufficient for SCSI data length */
++ if (fcp_rsp->resp.fr_status == SAM_STAT_GOOD)
++ set_host_byte(scsi, DID_ERROR);
+ }
+ }
+
--- /dev/null
+From 1a5d999ebfc7bfe28deb48931bb57faa8e4102b6 Mon Sep 17 00:00:00 2001
+From: Steffen Maier <maier@linux.vnet.ibm.com>
+Date: Fri, 28 Jul 2017 12:30:55 +0200
+Subject: scsi: zfcp: fix missing trace records for early returns in TMF eh handlers
+
+From: Steffen Maier <maier@linux.vnet.ibm.com>
+
+commit 1a5d999ebfc7bfe28deb48931bb57faa8e4102b6 upstream.
+
+For problem determination we need to see that we were in scsi_eh
+as well as whether and why we were successful or not.
+
+The following commits introduced new early returns without adding
+a trace record:
+
+v2.6.35 commit a1dbfddd02d2
+("[SCSI] zfcp: Pass return code from fc_block_scsi_eh to scsi eh")
+on fc_block_scsi_eh() returning != 0 which is FAST_IO_FAIL,
+
+v2.6.30 commit 63caf367e1c9
+("[SCSI] zfcp: Improve reliability of SCSI eh handlers in zfcp")
+on not having gotten an FSF request after the maximum number of retry
+attempts and thus could not issue a TMF and has to return FAILED.
+
+Signed-off-by: Steffen Maier <maier@linux.vnet.ibm.com>
+Fixes: a1dbfddd02d2 ("[SCSI] zfcp: Pass return code from fc_block_scsi_eh to scsi eh")
+Fixes: 63caf367e1c9 ("[SCSI] zfcp: Improve reliability of SCSI eh handlers in zfcp")
+Reviewed-by: Benjamin Block <bblock@linux.vnet.ibm.com>
+Signed-off-by: Benjamin Block <bblock@linux.vnet.ibm.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/scsi/zfcp_scsi.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/drivers/s390/scsi/zfcp_scsi.c
++++ b/drivers/s390/scsi/zfcp_scsi.c
+@@ -245,8 +245,10 @@ static int zfcp_task_mgmt_function(struc
+
+ zfcp_erp_wait(adapter);
+ ret = fc_block_scsi_eh(scpnt);
+- if (ret)
++ if (ret) {
++ zfcp_dbf_scsi_devreset("fiof", scpnt, tm_flags, NULL);
+ return ret;
++ }
+
+ if (!(atomic_read(&adapter->status) &
+ ZFCP_STATUS_COMMON_RUNNING)) {
+@@ -254,8 +256,10 @@ static int zfcp_task_mgmt_function(struc
+ return SUCCESS;
+ }
+ }
+- if (!fsf_req)
++ if (!fsf_req) {
++ zfcp_dbf_scsi_devreset("reqf", scpnt, tm_flags, NULL);
+ return FAILED;
++ }
+
+ wait_for_completion(&fsf_req->completion);
+
--- /dev/null
+From 12c3e5754c8022a4f2fd1e9f00d19e99ee0d3cc1 Mon Sep 17 00:00:00 2001
+From: Steffen Maier <maier@linux.vnet.ibm.com>
+Date: Fri, 28 Jul 2017 12:30:56 +0200
+Subject: scsi: zfcp: fix payload with full FCP_RSP IU in SCSI trace records
+
+From: Steffen Maier <maier@linux.vnet.ibm.com>
+
+commit 12c3e5754c8022a4f2fd1e9f00d19e99ee0d3cc1 upstream.
+
+If the FCP_RSP UI has optional parts (FCP_SNS_INFO or FCP_RSP_INFO) and
+thus does not fit into the fsp_rsp field built into a SCSI trace record,
+trace the full FCP_RSP UI with all optional parts as payload record
+instead of just FCP_SNS_INFO as payload and
+a 1 byte RSP_INFO_CODE part of FCP_RSP_INFO built into the SCSI record.
+
+That way we would also get the full FCP_SNS_INFO in case a
+target would ever send more than
+min(SCSI_SENSE_BUFFERSIZE==96, ZFCP_DBF_PAY_MAX_REC==256)==96.
+
+The mandatory part of FCP_RSP IU is only 24 bytes.
+PAYload costs at least one full PAY record of 256 bytes anyway.
+We cap to the hardware response size which is only FSF_FCP_RSP_SIZE==128.
+So we can just put the whole FCP_RSP IU with any optional parts into
+PAYload similarly as we do for SAN PAY since v4.9 commit aceeffbb59bb
+("zfcp: trace full payload of all SAN records (req,resp,iels)").
+This does not cause any additional trace records wasting memory.
+
+Decoded trace records were confusing because they showed a hard-coded
+sense data length of 96 even if the FCP_RSP_IU field FCP_SNS_LEN showed
+actually less.
+
+Since the same commit, we set pl_len for SAN traces to the full length of a
+request/response even if we cap the corresponding trace.
+In contrast, here for SCSI traces we set pl_len to the pre-computed
+length of FCP_RSP IU considering SNS_LEN or RSP_LEN if valid.
+Nonetheless we trace a hardcoded payload of length FSF_FCP_RSP_SIZE==128
+if there were optional parts.
+This makes it easier for the zfcpdbf tool to format only the relevant
+part of the long FCP_RSP UI buffer. And any trailing information is still
+available in the payload trace record just in case.
+
+Rename the payload record tag from "fcp_sns" to "fcp_riu" to make the new
+content explicit to zfcpdbf which can then pick a suitable field name such
+as "FCP rsp IU all:" instead of "Sense info :"
+Also, the same zfcpdbf can still be backwards compatible with "fcp_sns".
+
+Old example trace record before this fix, formatted with the tool zfcpdbf
+from s390-tools:
+
+Timestamp : ...
+Area : SCSI
+Subarea : 00
+Level : 3
+Exception : -
+CPU id : ..
+Caller : 0x...
+Record id : 1
+Tag : rsl_err
+Request id : 0x<request_id>
+SCSI ID : 0x...
+SCSI LUN : 0x...
+SCSI result : 0x00000002
+SCSI retries : 0x00
+SCSI allowed : 0x05
+SCSI scribble : 0x<request_id>
+SCSI opcode : 00000000 00000000 00000000 00000000
+FCP rsp inf cod: 0x00
+FCP rsp IU : 00000000 00000000 00000202 00000000
+ ^^==FCP_SNS_LEN_VALID
+ 00000020 00000000
+ ^^^^^^^^==FCP_SNS_LEN==32
+Sense len : 96 <==min(SCSI_SENSE_BUFFERSIZE,ZFCP_DBF_PAY_MAX_REC)
+Sense info : 70000600 00000018 00000000 29000000
+ 00000400 00000000 00000000 00000000
+ 00000000 00000000 00000000 00000000<==superfluous
+ 00000000 00000000 00000000 00000000<==superfluous
+ 00000000 00000000 00000000 00000000<==superfluous
+ 00000000 00000000 00000000 00000000<==superfluous
+
+New example trace records with this fix:
+
+Timestamp : ...
+Area : SCSI
+Subarea : 00
+Level : 3
+Exception : -
+CPU ID : ..
+Caller : 0x...
+Record ID : 1
+Tag : rsl_err
+Request ID : 0x<request_id>
+SCSI ID : 0x...
+SCSI LUN : 0x...
+SCSI result : 0x00000002
+SCSI retries : 0x00
+SCSI allowed : 0x03
+SCSI scribble : 0x<request_id>
+SCSI opcode : a30c0112 00000000 02000000 00000000
+FCP rsp inf cod: 0x00
+FCP rsp IU : 00000000 00000000 00000a02 00000200
+ 00000020 00000000
+FCP rsp IU len : 56
+FCP rsp IU all : 00000000 00000000 00000a02 00000200
+ ^^=FCP_RESID_UNDER|FCP_SNS_LEN_VALID
+ 00000020 00000000 70000500 00000018
+ ^^^^^^^^==FCP_SNS_LEN
+ ^^^^^^^^^^^^^^^^^
+ 00000000 240000cb 00011100 00000000
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ 00000000 00000000
+ ^^^^^^^^^^^^^^^^^==FCP_SNS_INFO
+
+Timestamp : ...
+Area : SCSI
+Subarea : 00
+Level : 1
+Exception : -
+CPU ID : ..
+Caller : 0x...
+Record ID : 1
+Tag : lr_okay
+Request ID : 0x<request_id>
+SCSI ID : 0x...
+SCSI LUN : 0x...
+SCSI result : 0x00000000
+SCSI retries : 0x00
+SCSI allowed : 0x05
+SCSI scribble : 0x<request_id>
+SCSI opcode : <CDB of unrelated SCSI command passed to eh handler>
+FCP rsp inf cod: 0x00
+FCP rsp IU : 00000000 00000000 00000100 00000000
+ 00000000 00000008
+FCP rsp IU len : 32
+FCP rsp IU all : 00000000 00000000 00000100 00000000
+ ^^==FCP_RSP_LEN_VALID
+ 00000000 00000008 00000000 00000000
+ ^^^^^^^^==FCP_RSP_LEN
+ ^^^^^^^^^^^^^^^^^==FCP_RSP_INFO
+
+Signed-off-by: Steffen Maier <maier@linux.vnet.ibm.com>
+Fixes: 250a1352b95e ("[SCSI] zfcp: Redesign of the debug tracing for SCSI records.")
+Reviewed-by: Benjamin Block <bblock@linux.vnet.ibm.com>
+Signed-off-by: Benjamin Block <bblock@linux.vnet.ibm.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/scsi/zfcp_dbf.c | 21 +++++++++++++++++----
+ 1 file changed, 17 insertions(+), 4 deletions(-)
+
+--- a/drivers/s390/scsi/zfcp_dbf.c
++++ b/drivers/s390/scsi/zfcp_dbf.c
+@@ -427,19 +427,32 @@ void zfcp_dbf_scsi(char *tag, struct scs
+
+ if (fsf) {
+ rec->fsf_req_id = fsf->req_id;
++ rec->pl_len = FCP_RESP_WITH_EXT;
+ fcp_rsp = (struct fcp_resp_with_ext *)
+ &(fsf->qtcb->bottom.io.fcp_rsp);
++ /* mandatory parts of FCP_RSP IU in this SCSI record */
+ memcpy(&rec->fcp_rsp, fcp_rsp, FCP_RESP_WITH_EXT);
+ if (fcp_rsp->resp.fr_flags & FCP_RSP_LEN_VAL) {
+ fcp_rsp_info = (struct fcp_resp_rsp_info *) &fcp_rsp[1];
+ rec->fcp_rsp_info = fcp_rsp_info->rsp_code;
++ rec->pl_len += be32_to_cpu(fcp_rsp->ext.fr_rsp_len);
+ }
+ if (fcp_rsp->resp.fr_flags & FCP_SNS_LEN_VAL) {
+- rec->pl_len = min((u16)SCSI_SENSE_BUFFERSIZE,
+- (u16)ZFCP_DBF_PAY_MAX_REC);
+- zfcp_dbf_pl_write(dbf, sc->sense_buffer, rec->pl_len,
+- "fcp_sns", fsf->req_id);
++ rec->pl_len += be32_to_cpu(fcp_rsp->ext.fr_sns_len);
+ }
++ /* complete FCP_RSP IU in associated PAYload record
++ * but only if there are optional parts
++ */
++ if (fcp_rsp->resp.fr_flags != 0)
++ zfcp_dbf_pl_write(
++ dbf, fcp_rsp,
++ /* at least one full PAY record
++ * but not beyond hardware response field
++ */
++ min_t(u16, max_t(u16, rec->pl_len,
++ ZFCP_DBF_PAY_MAX_REC),
++ FSF_FCP_RSP_SIZE),
++ "fcp_riu", fsf->req_id);
+ }
+
+ debug_event(dbf->scsi, 1, rec, sizeof(*rec));
--- /dev/null
+From 71b8e45da51a7b64a23378221c0a5868bd79da4f Mon Sep 17 00:00:00 2001
+From: Steffen Maier <maier@linux.vnet.ibm.com>
+Date: Fri, 28 Jul 2017 12:30:51 +0200
+Subject: scsi: zfcp: fix queuecommand for scsi_eh commands when DIX enabled
+
+From: Steffen Maier <maier@linux.vnet.ibm.com>
+
+commit 71b8e45da51a7b64a23378221c0a5868bd79da4f upstream.
+
+Since commit db007fc5e20c ("[SCSI] Command protection operation"),
+scsi_eh_prep_cmnd() saves scmd->prot_op and temporarily resets it to
+SCSI_PROT_NORMAL.
+Other FCP LLDDs such as qla2xxx and lpfc shield their queuecommand()
+to only access any of scsi_prot_sg...() if
+(scsi_get_prot_op(cmd) != SCSI_PROT_NORMAL).
+
+Do the same thing for zfcp, which introduced DIX support with
+commit ef3eb71d8ba4 ("[SCSI] zfcp: Introduce experimental support for
+DIF/DIX").
+
+Otherwise, TUR SCSI commands as part of scsi_eh likely fail in zfcp,
+because the regular SCSI command with DIX protection data, that scsi_eh
+re-uses in scsi_send_eh_cmnd(), of course still has
+(scsi_prot_sg_count() != 0) and so zfcp sends down bogus requests to the
+FCP channel hardware.
+
+This causes scsi_eh_test_devices() to have (finish_cmds == 0)
+[not SCSI device is online or not scsi_eh_tur() failed]
+so regular SCSI commands, that caused / were affected by scsi_eh,
+are moved to work_q and scsi_eh_test_devices() itself returns false.
+In turn, it unnecessarily escalates in our case in scsi_eh_ready_devs()
+beyond host reset to finally scsi_eh_offline_sdevs()
+which sets affected SCSI devices offline with the following kernel message:
+
+"kernel: sd H:0:T:L: Device offlined - not ready after error recovery"
+
+Signed-off-by: Steffen Maier <maier@linux.vnet.ibm.com>
+Fixes: ef3eb71d8ba4 ("[SCSI] zfcp: Introduce experimental support for DIF/DIX")
+Reviewed-by: Benjamin Block <bblock@linux.vnet.ibm.com>
+Signed-off-by: Benjamin Block <bblock@linux.vnet.ibm.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/scsi/zfcp_fsf.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/s390/scsi/zfcp_fsf.c
++++ b/drivers/s390/scsi/zfcp_fsf.c
+@@ -2247,7 +2247,8 @@ int zfcp_fsf_fcp_cmnd(struct scsi_cmnd *
+ fcp_cmnd = (struct fcp_cmnd *) &req->qtcb->bottom.io.fcp_cmnd;
+ zfcp_fc_scsi_to_fcp(fcp_cmnd, scsi_cmnd, 0);
+
+- if (scsi_prot_sg_count(scsi_cmnd)) {
++ if ((scsi_get_prot_op(scsi_cmnd) != SCSI_PROT_NORMAL) &&
++ scsi_prot_sg_count(scsi_cmnd)) {
+ zfcp_qdio_set_data_div(qdio, &req->qdio_req,
+ scsi_prot_sg_count(scsi_cmnd));
+ retval = zfcp_qdio_sbals_from_sg(qdio, &req->qdio_req,
--- /dev/null
+From fdb7cee3b9e3c561502e58137a837341f10cbf8b Mon Sep 17 00:00:00 2001
+From: Steffen Maier <maier@linux.vnet.ibm.com>
+Date: Fri, 28 Jul 2017 12:30:57 +0200
+Subject: scsi: zfcp: trace HBA FSF response by default on dismiss or timedout late response
+
+From: Steffen Maier <maier@linux.vnet.ibm.com>
+
+commit fdb7cee3b9e3c561502e58137a837341f10cbf8b upstream.
+
+At the default trace level, we only trace unsuccessful events including
+FSF responses.
+
+zfcp_dbf_hba_fsf_response() only used protocol status and FSF status to
+decide on an unsuccessful response. However, this is only one of multiple
+possible sources determining a failed struct zfcp_fsf_req.
+
+An FSF request can also "fail" if its response runs into an ERP timeout
+or if it gets dismissed because a higher level recovery was triggered
+[trace tags "erscf_1" or "erscf_2" in zfcp_erp_strategy_check_fsfreq()].
+FSF requests with ERP timeout are:
+FSF_QTCB_EXCHANGE_CONFIG_DATA, FSF_QTCB_EXCHANGE_PORT_DATA,
+FSF_QTCB_OPEN_PORT_WITH_DID or FSF_QTCB_CLOSE_PORT or
+FSF_QTCB_CLOSE_PHYSICAL_PORT for target ports,
+FSF_QTCB_OPEN_LUN, FSF_QTCB_CLOSE_LUN.
+One example is slow queue processing which can cause follow-on errors,
+e.g. FSF_PORT_ALREADY_OPEN after FSF_QTCB_OPEN_PORT_WITH_DID timed out.
+In order to see the root cause, we need to see late responses even if the
+channel presented them successfully with FSF_PROT_GOOD and FSF_GOOD.
+Example trace records formatted with zfcpdbf from the s390-tools package:
+
+Timestamp : ...
+Area : REC
+Subarea : 00
+Level : 1
+Exception : -
+CPU ID : ..
+Caller : ...
+Record ID : 1
+Tag : fcegpf1
+LUN : 0xffffffffffffffff
+WWPN : 0x<WWPN>
+D_ID : 0x00<D_ID>
+Adapter status : 0x5400050b
+Port status : 0x41200000
+LUN status : 0x00000000
+Ready count : 0x00000001
+Running count : 0x...
+ERP want : 0x02 ZFCP_ERP_ACTION_REOPEN_PORT
+ERP need : 0x02 ZFCP_ERP_ACTION_REOPEN_PORT
+|
+Timestamp : ... 30 seconds later
+Area : REC
+Subarea : 00
+Level : 1
+Exception : -
+CPU ID : ..
+Caller : ...
+Record ID : 2
+Tag : erscf_2
+LUN : 0xffffffffffffffff
+WWPN : 0x<WWPN>
+D_ID : 0x00<D_ID>
+Adapter status : 0x5400050b
+Port status : 0x41200000
+LUN status : 0x00000000
+Request ID : 0x<request_ID>
+ERP status : 0x10000000 ZFCP_STATUS_ERP_TIMEDOUT
+ERP step : 0x0800 ZFCP_ERP_STEP_PORT_OPENING
+ERP action : 0x02 ZFCP_ERP_ACTION_REOPEN_PORT
+ERP count : 0x00
+|
+Timestamp : ... later than previous record
+Area : HBA
+Subarea : 00
+Level : 5 > default level => 3 <= default level
+Exception : -
+CPU ID : 00
+Caller : ...
+Record ID : 1
+Tag : fs_qtcb => fs_rerr
+Request ID : 0x<request_ID>
+Request status : 0x00001010 ZFCP_STATUS_FSFREQ_DISMISSED
+ | ZFCP_STATUS_FSFREQ_CLEANUP
+FSF cmnd : 0x00000005
+FSF sequence no: 0x...
+FSF issued : ... > 30 seconds ago
+FSF stat : 0x00000000 FSF_GOOD
+FSF stat qual : 00000000 00000000 00000000 00000000
+Prot stat : 0x00000001 FSF_PROT_GOOD
+Prot stat qual : 00000000 00000000 00000000 00000000
+Port handle : 0x...
+LUN handle : 0x00000000
+QTCB log length: ...
+QTCB log info : ...
+
+In case of problems detecting that new responses are waiting on the input
+queue, we sooner or later trigger adapter recovery due to an FSF request
+timeout (trace tag "fsrth_1").
+FSF requests with FSF request timeout are:
+typically FSF_QTCB_ABORT_FCP_CMND; but theoretically also
+FSF_QTCB_EXCHANGE_CONFIG_DATA or FSF_QTCB_EXCHANGE_PORT_DATA via sysfs,
+FSF_QTCB_OPEN_PORT_WITH_DID or FSF_QTCB_CLOSE_PORT for WKA ports,
+FSF_QTCB_FCP_CMND for task management function (LUN / target reset).
+One or more pending requests can meanwhile have FSF_PROT_GOOD and FSF_GOOD
+because the channel filled in the response via DMA into the request's QTCB.
+
+In a theroretical case, inject code can create an erroneous FSF request
+on purpose. If data router is enabled, it uses deferred error reporting.
+A READ SCSI command can succeed with FSF_PROT_GOOD, FSF_GOOD, and
+SAM_STAT_GOOD. But on writing the read data to host memory via DMA,
+it can still fail, e.g. if an intentionally wrong scatter list does not
+provide enough space. Rather than getting an unsuccessful response,
+we get a QDIO activate check which in turn triggers adapter recovery.
+One or more pending requests can meanwhile have FSF_PROT_GOOD and FSF_GOOD
+because the channel filled in the response via DMA into the request's QTCB.
+Example trace records formatted with zfcpdbf from the s390-tools package:
+
+Timestamp : ...
+Area : HBA
+Subarea : 00
+Level : 6 > default level => 3 <= default level
+Exception : -
+CPU ID : ..
+Caller : ...
+Record ID : 1
+Tag : fs_norm => fs_rerr
+Request ID : 0x<request_ID2>
+Request status : 0x00001010 ZFCP_STATUS_FSFREQ_DISMISSED
+ | ZFCP_STATUS_FSFREQ_CLEANUP
+FSF cmnd : 0x00000001
+FSF sequence no: 0x...
+FSF issued : ...
+FSF stat : 0x00000000 FSF_GOOD
+FSF stat qual : 00000000 00000000 00000000 00000000
+Prot stat : 0x00000001 FSF_PROT_GOOD
+Prot stat qual : ........ ........ 00000000 00000000
+Port handle : 0x...
+LUN handle : 0x...
+|
+Timestamp : ...
+Area : SCSI
+Subarea : 00
+Level : 3
+Exception : -
+CPU ID : ..
+Caller : ...
+Record ID : 1
+Tag : rsl_err
+Request ID : 0x<request_ID2>
+SCSI ID : 0x...
+SCSI LUN : 0x...
+SCSI result : 0x000e0000 DID_TRANSPORT_DISRUPTED
+SCSI retries : 0x00
+SCSI allowed : 0x05
+SCSI scribble : 0x<request_ID2>
+SCSI opcode : 28... Read(10)
+FCP rsp inf cod: 0x00
+FCP rsp IU : 00000000 00000000 00000000 00000000
+ ^^ SAM_STAT_GOOD
+ 00000000 00000000
+
+Only with luck in both above cases, we could see a follow-on trace record
+of an unsuccesful event following a successful but late FSF response with
+FSF_PROT_GOOD and FSF_GOOD. Typically this was the case for I/O requests
+resulting in a SCSI trace record "rsl_err" with DID_TRANSPORT_DISRUPTED
+[On ZFCP_STATUS_FSFREQ_DISMISSED, zfcp_fsf_protstatus_eval() sets
+ZFCP_STATUS_FSFREQ_ERROR seen by the request handler functions as failure].
+However, the reason for this follow-on trace was invisible because the
+corresponding HBA trace record was missing at the default trace level
+(by default hidden records with tags "fs_norm", "fs_qtcb", or "fs_open").
+
+On adapter recovery, after we had shut down the QDIO queues, we perform
+unsuccessful pseudo completions with flag ZFCP_STATUS_FSFREQ_DISMISSED
+for each pending FSF request in zfcp_fsf_req_dismiss_all().
+In order to find the root cause, we need to see all pseudo responses even
+if the channel presented them successfully with FSF_PROT_GOOD and FSF_GOOD.
+
+Therefore, check zfcp_fsf_req.status for ZFCP_STATUS_FSFREQ_DISMISSED
+or ZFCP_STATUS_FSFREQ_ERROR and trace with a new tag "fs_rerr".
+
+It does not matter that there are numerous places which set
+ZFCP_STATUS_FSFREQ_ERROR after the location where we trace an FSF response
+early. These cases are based on protocol status != FSF_PROT_GOOD or
+== FSF_PROT_FSF_STATUS_PRESENTED and are thus already traced by default
+as trace tag "fs_perr" or "fs_ferr" respectively.
+
+NB: The trace record with tag "fssrh_1" for status read buffers on dismiss
+all remains. zfcp_fsf_req_complete() handles this and returns early.
+All other FSF request types are handled separately and as described above.
+
+Signed-off-by: Steffen Maier <maier@linux.vnet.ibm.com>
+Fixes: 8a36e4532ea1 ("[SCSI] zfcp: enhancement of zfcp debug features")
+Fixes: 2e261af84cdb ("[SCSI] zfcp: Only collect FSF/HBA debug data for matching trace levels")
+Reviewed-by: Benjamin Block <bblock@linux.vnet.ibm.com>
+Signed-off-by: Benjamin Block <bblock@linux.vnet.ibm.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/scsi/zfcp_dbf.h | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/s390/scsi/zfcp_dbf.h
++++ b/drivers/s390/scsi/zfcp_dbf.h
+@@ -291,7 +291,11 @@ void zfcp_dbf_hba_fsf_response(struct zf
+ {
+ struct fsf_qtcb *qtcb = req->qtcb;
+
+- if ((qtcb->prefix.prot_status != FSF_PROT_GOOD) &&
++ if (unlikely(req->status & (ZFCP_STATUS_FSFREQ_DISMISSED |
++ ZFCP_STATUS_FSFREQ_ERROR))) {
++ zfcp_dbf_hba_fsf_resp("fs_rerr", 3, req);
++
++ } else if ((qtcb->prefix.prot_status != FSF_PROT_GOOD) &&
+ (qtcb->prefix.prot_status != FSF_PROT_FSF_STATUS_PRESENTED)) {
+ zfcp_dbf_hba_fsf_resp("fs_perr", 1, req);
+
--- /dev/null
+From 5d4a3d0a2ff23799b956e5962b886287614e7fad Mon Sep 17 00:00:00 2001
+From: Steffen Maier <maier@linux.vnet.ibm.com>
+Date: Fri, 28 Jul 2017 12:30:58 +0200
+Subject: scsi: zfcp: trace high part of "new" 64 bit SCSI LUN
+
+From: Steffen Maier <maier@linux.vnet.ibm.com>
+
+commit 5d4a3d0a2ff23799b956e5962b886287614e7fad upstream.
+
+Complements debugging aspects of the otherwise functionally complete
+v3.17 commit 9cb78c16f5da ("scsi: use 64-bit LUNs").
+
+While I don't have access to a target exporting 3 or 4 level LUNs,
+I did test it by explicitly attaching a non-existent fake 4 level LUN
+by means of zfcp sysfs attribute "unit_add".
+In order to see corresponding trace records of otherwise successful
+events, we had to increase the trace level of area SCSI and HBA to 6.
+
+$ echo 6 > /sys/kernel/debug/s390dbf/zfcp_0.0.1880_scsi/level
+$ echo 6 > /sys/kernel/debug/s390dbf/zfcp_0.0.1880_hba/level
+
+$ echo 0x4011402240334044 > \
+ /sys/bus/ccw/drivers/zfcp/0.0.1880/0x50050763031bd327/unit_add
+
+Example output formatted by an updated zfcpdbf from the s390-tools
+package interspersed with kernel messages at scsi_logging_level=4605:
+
+Timestamp : ...
+Area : REC
+Subarea : 00
+Level : 1
+Exception : -
+CPU ID : ..
+Caller : 0x...
+Record ID : 1
+Tag : scsla_1
+LUN : 0x4011402240334044
+WWPN : 0x50050763031bd327
+D_ID : 0x00......
+Adapter status : 0x5400050b
+Port status : 0x54000001
+LUN status : 0x41000000
+Ready count : 0x00000001
+Running count : 0x00000000
+ERP want : 0x01
+ERP need : 0x01
+
+scsi 2:0:0:4630896905707208721: scsi scan: INQUIRY pass 1 length 36
+scsi 2:0:0:4630896905707208721: scsi scan: INQUIRY successful with code 0x0
+
+Timestamp : ...
+Area : HBA
+Subarea : 00
+Level : 6
+Exception : -
+CPU ID : ..
+Caller : 0x...
+Record ID : 1
+Tag : fs_norm
+Request ID : 0x<inquiry2-req-id>
+Request status : 0x00000010
+FSF cmnd : 0x00000001
+FSF sequence no: 0x...
+FSF issued : ...
+FSF stat : 0x00000000
+FSF stat qual : 00000000 00000000 00000000 00000000
+Prot stat : 0x00000001
+Prot stat qual : ........ ........ 00000000 00000000
+Port handle : 0x...
+LUN handle : 0x...
+|
+Timestamp : ...
+Area : SCSI
+Subarea : 00
+Level : 6
+Exception : -
+CPU ID : ..
+Caller : 0x...
+Record ID : 1
+Tag : rsl_nor
+Request ID : 0x<inquiry2-req-id>
+SCSI ID : 0x00000000
+SCSI LUN : 0x40224011
+SCSI LUN high : 0x40444033 <=======================
+SCSI result : 0x00000000
+SCSI retries : 0x00
+SCSI allowed : 0x03
+SCSI scribble : 0x<inquiry2-req-id>
+SCSI opcode : 12000000 a4000000 00000000 00000000
+FCP rsp inf cod: 0x00
+FCP rsp IU : 00000000 00000000 00000000 00000000
+ 00000000 00000000
+
+scsi 2:0:0:4630896905707208721: scsi scan: INQUIRY pass 2 length 164
+scsi 2:0:0:4630896905707208721: scsi scan: INQUIRY successful with code 0x0
+scsi 2:0:0:4630896905707208721: scsi scan: peripheral device type of 31, \
+no device added
+
+Signed-off-by: Steffen Maier <maier@linux.vnet.ibm.com>
+Fixes: 9cb78c16f5da ("scsi: use 64-bit LUNs")
+Reviewed-by: Benjamin Block <bblock@linux.vnet.ibm.com>
+Reviewed-by: Jens Remus <jremus@linux.vnet.ibm.com>
+Signed-off-by: Benjamin Block <bblock@linux.vnet.ibm.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/s390/scsi/zfcp_dbf.c | 2 +-
+ drivers/s390/scsi/zfcp_dbf.h | 4 +++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/s390/scsi/zfcp_dbf.c
++++ b/drivers/s390/scsi/zfcp_dbf.c
+@@ -418,8 +418,8 @@ void zfcp_dbf_scsi(char *tag, struct scs
+ rec->scsi_retries = sc->retries;
+ rec->scsi_allowed = sc->allowed;
+ rec->scsi_id = sc->device->id;
+- /* struct zfcp_dbf_scsi needs to be updated to handle 64bit LUNs */
+ rec->scsi_lun = (u32)sc->device->lun;
++ rec->scsi_lun_64_hi = (u32)(sc->device->lun >> 32);
+ rec->host_scribble = (unsigned long)sc->host_scribble;
+
+ memcpy(rec->scsi_opcode, sc->cmnd,
+--- a/drivers/s390/scsi/zfcp_dbf.h
++++ b/drivers/s390/scsi/zfcp_dbf.h
+@@ -196,7 +196,7 @@ enum zfcp_dbf_scsi_id {
+ * @id: unique number of recovery record type
+ * @tag: identifier string specifying the location of initiation
+ * @scsi_id: scsi device id
+- * @scsi_lun: scsi device logical unit number
++ * @scsi_lun: scsi device logical unit number, low part of 64 bit, old 32 bit
+ * @scsi_result: scsi result
+ * @scsi_retries: current retry number of scsi request
+ * @scsi_allowed: allowed retries
+@@ -206,6 +206,7 @@ enum zfcp_dbf_scsi_id {
+ * @host_scribble: LLD specific data attached to SCSI request
+ * @pl_len: length of paload stored as zfcp_dbf_pay
+ * @fsf_rsp: response for fsf request
++ * @scsi_lun_64_hi: scsi device logical unit number, high part of 64 bit
+ */
+ struct zfcp_dbf_scsi {
+ u8 id;
+@@ -222,6 +223,7 @@ struct zfcp_dbf_scsi {
+ u64 host_scribble;
+ u16 pl_len;
+ struct fcp_resp_with_ext fcp_rsp;
++ u32 scsi_lun_64_hi;
+ } __packed;
+
+ /**
powerpc-fix-dar-reporting-when-alignment-handler-faults.patch
block-relax-a-check-in-blk_start_queue.patch
md-bitmap-disable-bitmap_resize-for-file-backed-bitmaps.patch
+skd-avoid-that-module-unloading-triggers-a-use-after-free.patch
+skd-submit-requests-to-firmware-before-triggering-the-doorbell.patch
+scsi-zfcp-fix-queuecommand-for-scsi_eh-commands-when-dix-enabled.patch
+scsi-zfcp-add-handling-for-fcp_resid_over-to-the-fcp-ingress-path.patch
+scsi-zfcp-fix-missing-trace-records-for-early-returns-in-tmf-eh-handlers.patch
+scsi-zfcp-fix-payload-with-full-fcp_rsp-iu-in-scsi-trace-records.patch
+scsi-zfcp-trace-hba-fsf-response-by-default-on-dismiss-or-timedout-late-response.patch
+scsi-zfcp-trace-high-part-of-new-64-bit-scsi-lun.patch
--- /dev/null
+From 7277cc67b3916eed47558c64f9c9c0de00a35cda Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bart.vanassche@wdc.com>
+Date: Thu, 17 Aug 2017 13:12:45 -0700
+Subject: skd: Avoid that module unloading triggers a use-after-free
+
+From: Bart Van Assche <bart.vanassche@wdc.com>
+
+commit 7277cc67b3916eed47558c64f9c9c0de00a35cda upstream.
+
+Since put_disk() triggers a disk_release() call and since that
+last function calls blk_put_queue() if disk->queue != NULL, clear
+the disk->queue pointer before calling put_disk(). This avoids
+that unloading the skd kernel module triggers the following
+use-after-free:
+
+WARNING: CPU: 8 PID: 297 at lib/refcount.c:128 refcount_sub_and_test+0x70/0x80
+refcount_t: underflow; use-after-free.
+CPU: 8 PID: 297 Comm: kworker/8:1 Not tainted 4.11.10-300.fc26.x86_64 #1
+Workqueue: events work_for_cpu_fn
+Call Trace:
+ dump_stack+0x63/0x84
+ __warn+0xcb/0xf0
+ warn_slowpath_fmt+0x5a/0x80
+ refcount_sub_and_test+0x70/0x80
+ refcount_dec_and_test+0x11/0x20
+ kobject_put+0x1f/0x50
+ blk_put_queue+0x15/0x20
+ disk_release+0xae/0xf0
+ device_release+0x32/0x90
+ kobject_release+0x67/0x170
+ kobject_put+0x2b/0x50
+ put_disk+0x17/0x20
+ skd_destruct+0x5c/0x890 [skd]
+ skd_pci_probe+0x124d/0x13a0 [skd]
+ local_pci_probe+0x42/0xa0
+ work_for_cpu_fn+0x14/0x20
+ process_one_work+0x19e/0x470
+ worker_thread+0x1dc/0x4a0
+ kthread+0x125/0x140
+ ret_from_fork+0x25/0x30
+
+Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Hannes Reinecke <hare@suse.de>
+Cc: Johannes Thumshirn <jthumshirn@suse.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/skd_main.c | 15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+--- a/drivers/block/skd_main.c
++++ b/drivers/block/skd_main.c
+@@ -4679,15 +4679,16 @@ static void skd_free_disk(struct skd_dev
+ {
+ struct gendisk *disk = skdev->disk;
+
+- if (disk != NULL) {
+- struct request_queue *q = disk->queue;
++ if (disk && (disk->flags & GENHD_FL_UP))
++ del_gendisk(disk);
+
+- if (disk->flags & GENHD_FL_UP)
+- del_gendisk(disk);
+- if (q)
+- blk_cleanup_queue(q);
+- put_disk(disk);
++ if (skdev->queue) {
++ blk_cleanup_queue(skdev->queue);
++ skdev->queue = NULL;
++ disk->queue = NULL;
+ }
++
++ put_disk(disk);
+ skdev->disk = NULL;
+ }
+
--- /dev/null
+From 5fbd545cd3fd311ea1d6e8be4cedddd0ee5684c7 Mon Sep 17 00:00:00 2001
+From: Bart Van Assche <bart.vanassche@wdc.com>
+Date: Thu, 17 Aug 2017 13:12:46 -0700
+Subject: skd: Submit requests to firmware before triggering the doorbell
+
+From: Bart Van Assche <bart.vanassche@wdc.com>
+
+commit 5fbd545cd3fd311ea1d6e8be4cedddd0ee5684c7 upstream.
+
+Ensure that the members of struct skd_msg_buf have been transferred
+to the PCIe adapter before the doorbell is triggered. This patch
+avoids that I/O fails sporadically and that the following error
+message is reported:
+
+(skd0:STM000196603:[0000:00:09.0]): Completion mismatch comp_id=0x0000 skreq=0x0400 new=0x0000
+
+Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
+Cc: Christoph Hellwig <hch@lst.de>
+Cc: Hannes Reinecke <hare@suse.de>
+Cc: Johannes Thumshirn <jthumshirn@suse.de>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/block/skd_main.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/block/skd_main.c
++++ b/drivers/block/skd_main.c
+@@ -2214,6 +2214,9 @@ static void skd_send_fitmsg(struct skd_d
+ */
+ qcmd |= FIT_QCMD_MSGSIZE_64;
+
++ /* Make sure skd_msg_buf is written before the doorbell is triggered. */
++ smp_wmb();
++
+ SKD_WRITEQ(skdev, qcmd, FIT_Q_COMMAND);
+
+ }
+@@ -2260,6 +2263,9 @@ static void skd_send_special_fitmsg(stru
+ qcmd = skspcl->mb_dma_address;
+ qcmd |= FIT_QCMD_QID_NORMAL + FIT_QCMD_MSGSIZE_128;
+
++ /* Make sure skd_msg_buf is written before the doorbell is triggered. */
++ smp_wmb();
++
+ SKD_WRITEQ(skdev, qcmd, FIT_Q_COMMAND);
+ }
+