From: Greg Kroah-Hartman Date: Fri, 22 Sep 2017 10:14:30 +0000 (+0200) Subject: 4.13-stable patches X-Git-Tag: v3.18.72~19 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=c843463f4a5d55fc3e30846da4bd5fd61be7e21c;p=thirdparty%2Fkernel%2Fstable-queue.git 4.13-stable patches added patches: scsi-aacraid-fix-command-send-race-condition.patch scsi-megaraid_sas-check-valid-aen-class-range-to-avoid-kernel-panic.patch scsi-megaraid_sas-mismatch-of-allocated-mfi-frame-size-and-length-exposed-in-mfi-mpt-pass-through-command.patch scsi-megaraid_sas-return-pended-ioctls-with-cmd_status-mfi_stat_wrong_state-in-case-adapter-is-dead.patch scsi-megaraid_sas-set-minimum-value-of-resetwaittime-to-be-1-secs.patch scsi-qedi-off-by-one-in-qedi_get_cmd_from_tid.patch scsi-sg-factor-out-sg_fill_request_table.patch scsi-storvsc-fix-memory-leak-on-ring-buffer-busy.patch scsi-zfcp-add-handling-for-fcp_resid_over-to-the-fcp-ingress-path.patch scsi-zfcp-fix-capping-of-unsuccessful-gpn_ft-san-response-trace-records.patch scsi-zfcp-fix-missing-trace-records-for-early-returns-in-tmf-eh-handlers.patch scsi-zfcp-fix-passing-fsf_req-to-scsi-trace-on-tmf-to-correlate-with-hba.patch scsi-zfcp-fix-payload-with-full-fcp_rsp-iu-in-scsi-trace-records.patch scsi-zfcp-fix-queuecommand-for-scsi_eh-commands-when-dix-enabled.patch scsi-zfcp-trace-hba-fsf-response-by-default-on-dismiss-or-timedout-late-response.patch scsi-zfcp-trace-high-part-of-new-64-bit-scsi-lun.patch skd-avoid-that-module-unloading-triggers-a-use-after-free.patch skd-submit-requests-to-firmware-before-triggering-the-doorbell.patch --- diff --git a/queue-4.13/iwlwifi-add-workaround-to-disable-wide-channels-in-5ghz.patch b/queue-4.13/iwlwifi-add-workaround-to-disable-wide-channels-in-5ghz.patch deleted file mode 100644 index c12b26cee69..00000000000 --- a/queue-4.13/iwlwifi-add-workaround-to-disable-wide-channels-in-5ghz.patch +++ /dev/null @@ -1,189 +0,0 @@ -From 01a9c948a09348950515bf2abb6113ed83e696d8 Mon Sep 17 00:00:00 2001 -From: Luca Coelho -Date: Tue, 15 Aug 2017 20:48:41 +0300 -Subject: iwlwifi: add workaround to disable wide channels in 5GHz - -From: Luca Coelho - -commit 01a9c948a09348950515bf2abb6113ed83e696d8 upstream. - -The OTP in some SKUs have erroneously allowed 40MHz and 80MHz channels -in the 5.2GHz band. The firmware has been modified to not allow this -in those SKUs, so the driver needs to do the same otherwise the -firmware will assert when we try to use it. - -Signed-off-by: Luca Coelho -Signed-off-by: Greg Kroah-Hartman - -diff --git a/drivers/net/wireless/intel/iwlwifi/fw/nvm.c b/drivers/net/wireless/intel/iwlwifi/fw/nvm.c -index ae03d0f5564f..e81f6dd3744e 100644 ---- a/drivers/net/wireless/intel/iwlwifi/fw/nvm.c -+++ b/drivers/net/wireless/intel/iwlwifi/fw/nvm.c -@@ -148,7 +148,8 @@ struct iwl_nvm_data *iwl_fw_get_nvm(struct iwl_fw_runtime *fwrt) - rsp->regulatory.channel_profile, - nvm->valid_tx_ant & fwrt->fw->valid_tx_ant, - nvm->valid_rx_ant & fwrt->fw->valid_rx_ant, -- rsp->regulatory.lar_enabled && lar_fw_supported); -+ rsp->regulatory.lar_enabled && lar_fw_supported, -+ false); - - iwl_free_resp(&hcmd); - return nvm; -diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c -index 1172e4572a82..ea165b3e6dd3 100644 ---- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c -+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c -@@ -79,6 +79,7 @@ - /* NVM offsets (in words) definitions */ - enum wkp_nvm_offsets { - /* NVM HW-Section offset (in words) definitions */ -+ SUBSYSTEM_ID = 0x0A, - HW_ADDR = 0x15, - - /* NVM SW-Section offset (in words) definitions */ -@@ -258,13 +259,12 @@ static u32 iwl_get_channel_flags(u8 ch_num, int ch_idx, bool is_5ghz, - static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg, - struct iwl_nvm_data *data, - const __le16 * const nvm_ch_flags, -- bool lar_supported) -+ bool lar_supported, bool no_wide_in_5ghz) - { - int ch_idx; - int n_channels = 0; - struct ieee80211_channel *channel; - u16 ch_flags; -- bool is_5ghz; - int num_of_ch, num_2ghz_channels; - const u8 *nvm_chan; - -@@ -279,12 +279,20 @@ static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg, - } - - for (ch_idx = 0; ch_idx < num_of_ch; ch_idx++) { -+ bool is_5ghz = (ch_idx >= num_2ghz_channels); -+ - ch_flags = __le16_to_cpup(nvm_ch_flags + ch_idx); - -- if (ch_idx >= num_2ghz_channels && -- !data->sku_cap_band_52GHz_enable) -+ if (is_5ghz && !data->sku_cap_band_52GHz_enable) - continue; - -+ /* workaround to disable wide channels in 5GHz */ -+ if (no_wide_in_5ghz && is_5ghz) { -+ ch_flags &= ~(NVM_CHANNEL_40MHZ | -+ NVM_CHANNEL_80MHZ | -+ NVM_CHANNEL_160MHZ); -+ } -+ - if (ch_flags & NVM_CHANNEL_160MHZ) - data->vht160_supported = true; - -@@ -307,8 +315,8 @@ static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg, - n_channels++; - - channel->hw_value = nvm_chan[ch_idx]; -- channel->band = (ch_idx < num_2ghz_channels) ? -- NL80211_BAND_2GHZ : NL80211_BAND_5GHZ; -+ channel->band = is_5ghz ? -+ NL80211_BAND_5GHZ : NL80211_BAND_2GHZ; - channel->center_freq = - ieee80211_channel_to_frequency( - channel->hw_value, channel->band); -@@ -320,7 +328,6 @@ static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg, - * is not used in mvm, and is used for backwards compatibility - */ - channel->max_power = IWL_DEFAULT_MAX_TX_POWER; -- is_5ghz = channel->band == NL80211_BAND_5GHZ; - - /* don't put limitations in case we're using LAR */ - if (!lar_supported) -@@ -438,14 +445,15 @@ static void iwl_init_vht_hw_capab(const struct iwl_cfg *cfg, - - void iwl_init_sbands(struct device *dev, const struct iwl_cfg *cfg, - struct iwl_nvm_data *data, const __le16 *nvm_ch_flags, -- u8 tx_chains, u8 rx_chains, bool lar_supported) -+ u8 tx_chains, u8 rx_chains, bool lar_supported, -+ bool no_wide_in_5ghz) - { - int n_channels; - int n_used = 0; - struct ieee80211_supported_band *sband; - - n_channels = iwl_init_channel_map(dev, cfg, data, nvm_ch_flags, -- lar_supported); -+ lar_supported, no_wide_in_5ghz); - sband = &data->bands[NL80211_BAND_2GHZ]; - sband->band = NL80211_BAND_2GHZ; - sband->bitrates = &iwl_cfg80211_rates[RATES_24_OFFS]; -@@ -651,6 +659,39 @@ static int iwl_set_hw_address(struct iwl_trans *trans, - return 0; - } - -+static bool -+iwl_nvm_no_wide_in_5ghz(struct device *dev, const struct iwl_cfg *cfg, -+ const __le16 *nvm_hw) -+{ -+ /* -+ * Workaround a bug in Indonesia SKUs where the regulatory in -+ * some 7000-family OTPs erroneously allow wide channels in -+ * 5GHz. To check for Indonesia, we take the SKU value from -+ * bits 1-4 in the subsystem ID and check if it is either 5 or -+ * 9. In those cases, we need to force-disable wide channels -+ * in 5GHz otherwise the FW will throw a sysassert when we try -+ * to use them. -+ */ -+ if (cfg->device_family == IWL_DEVICE_FAMILY_7000) { -+ /* -+ * Unlike the other sections in the NVM, the hw -+ * section uses big-endian. -+ */ -+ u16 subsystem_id = be16_to_cpup((const __be16 *)nvm_hw -+ + SUBSYSTEM_ID); -+ u8 sku = (subsystem_id & 0x1e) >> 1; -+ -+ if (sku == 5 || sku == 9) { -+ IWL_DEBUG_EEPROM(dev, -+ "disabling wide channels in 5GHz (0x%0x %d)\n", -+ subsystem_id, sku); -+ return true; -+ } -+ } -+ -+ return false; -+} -+ - struct iwl_nvm_data * - iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg, - const __le16 *nvm_hw, const __le16 *nvm_sw, -@@ -661,6 +702,7 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg, - struct device *dev = trans->dev; - struct iwl_nvm_data *data; - bool lar_enabled; -+ bool no_wide_in_5ghz = iwl_nvm_no_wide_in_5ghz(dev, cfg, nvm_hw); - u32 sku, radio_cfg; - u16 lar_config; - const __le16 *ch_section; -@@ -731,7 +773,7 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg, - } - - iwl_init_sbands(dev, cfg, data, ch_section, tx_chains, rx_chains, -- lar_fw_supported && lar_enabled); -+ lar_fw_supported && lar_enabled, no_wide_in_5ghz); - data->calib_version = 255; - - return data; -diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h -index 3fd6506a02ab..50d9b3eaa4f8 100644 ---- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h -+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h -@@ -93,7 +93,8 @@ void iwl_set_hw_address_from_csr(struct iwl_trans *trans, - */ - void iwl_init_sbands(struct device *dev, const struct iwl_cfg *cfg, - struct iwl_nvm_data *data, const __le16 *nvm_ch_flags, -- u8 tx_chains, u8 rx_chains, bool lar_supported); -+ u8 tx_chains, u8 rx_chains, bool lar_supported, -+ bool no_wide_in_5ghz); - - /** - * iwl_parse_mcc_info - parse MCC (mobile country code) info coming from FW diff --git a/queue-4.13/scsi-aacraid-fix-command-send-race-condition.patch b/queue-4.13/scsi-aacraid-fix-command-send-race-condition.patch new file mode 100644 index 00000000000..98486e0b3e0 --- /dev/null +++ b/queue-4.13/scsi-aacraid-fix-command-send-race-condition.patch @@ -0,0 +1,235 @@ +From 1ae948fa4f00f3a2823e7cb19a3049ef27dd6947 Mon Sep 17 00:00:00 2001 +From: Brian King +Date: Tue, 29 Aug 2017 10:00:29 -0500 +Subject: scsi: aacraid: Fix command send race condition + +From: Brian King + +commit 1ae948fa4f00f3a2823e7cb19a3049ef27dd6947 upstream. + +This fixes a potential race condition observed on Power systems. + +Several places throughout the aacraid driver call aac_fib_send or +similar to send a command to the aacraid adapter, then check the return +code to determine if the command was actually sent to the adapter, then +update the phase field in the scsi command scratch pad area to track +that the firmware now owns this command. However, there is nothing that +ensures that by the time the aac_fib_send function returns and we go to +write to the scsi command, that the command hasn't already completed and +the scsi command has been freed. This was causing random crashes in the +TCP stack which was tracked down to be caused by memory that had been a +struct request + scsi_cmnd being now used for an skbuff. Memory +poisoning was enabled in the kernel to debug this which showed that the +last owner of the memory that had been freed was aacraid and that it was +a struct request. The memory that was corrupted was the exact data +pattern of AAC_OWNER_FIRMWARE and it was at the same offset that aacraid +writes, which is scsicmd->SCp.phase. The patch below resolves this +issue. + +Signed-off-by: Brian King +Tested-by: Wen Xiong +Reviewed-by: Dave Carroll +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/aacraid/aachba.c | 54 ++++++++++++++++-------------------------- + 1 file changed, 21 insertions(+), 33 deletions(-) + +--- a/drivers/scsi/aacraid/aachba.c ++++ b/drivers/scsi/aacraid/aachba.c +@@ -594,6 +594,7 @@ static int aac_get_container_name(struct + + aac_fib_init(cmd_fibcontext); + dinfo = (struct aac_get_name *) fib_data(cmd_fibcontext); ++ scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; + + dinfo->command = cpu_to_le32(VM_ContainerConfig); + dinfo->type = cpu_to_le32(CT_READ_NAME); +@@ -611,10 +612,8 @@ static int aac_get_container_name(struct + /* + * Check that the command queued to the controller + */ +- if (status == -EINPROGRESS) { +- scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; ++ if (status == -EINPROGRESS) + return 0; +- } + + printk(KERN_WARNING "aac_get_container_name: aac_fib_send failed with status: %d.\n", status); + aac_fib_complete(cmd_fibcontext); +@@ -725,6 +724,7 @@ static void _aac_probe_container1(void * + + dinfo->count = cpu_to_le32(scmd_id(scsicmd)); + dinfo->type = cpu_to_le32(FT_FILESYS); ++ scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; + + status = aac_fib_send(ContainerCommand, + fibptr, +@@ -736,9 +736,7 @@ static void _aac_probe_container1(void * + /* + * Check that the command queued to the controller + */ +- if (status == -EINPROGRESS) +- scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; +- else if (status < 0) { ++ if (status < 0 && status != -EINPROGRESS) { + /* Inherit results from VM_NameServe, if any */ + dresp->status = cpu_to_le32(ST_OK); + _aac_probe_container2(context, fibptr); +@@ -766,6 +764,7 @@ static int _aac_probe_container(struct s + dinfo->count = cpu_to_le32(scmd_id(scsicmd)); + dinfo->type = cpu_to_le32(FT_FILESYS); + scsicmd->SCp.ptr = (char *)callback; ++ scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; + + status = aac_fib_send(ContainerCommand, + fibptr, +@@ -777,10 +776,9 @@ static int _aac_probe_container(struct s + /* + * Check that the command queued to the controller + */ +- if (status == -EINPROGRESS) { +- scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; ++ if (status == -EINPROGRESS) + return 0; +- } ++ + if (status < 0) { + scsicmd->SCp.ptr = NULL; + aac_fib_complete(fibptr); +@@ -1126,6 +1124,7 @@ static int aac_get_container_serial(stru + dinfo->command = cpu_to_le32(VM_ContainerConfig); + dinfo->type = cpu_to_le32(CT_CID_TO_32BITS_UID); + dinfo->cid = cpu_to_le32(scmd_id(scsicmd)); ++ scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; + + status = aac_fib_send(ContainerCommand, + cmd_fibcontext, +@@ -1138,10 +1137,8 @@ static int aac_get_container_serial(stru + /* + * Check that the command queued to the controller + */ +- if (status == -EINPROGRESS) { +- scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; ++ if (status == -EINPROGRESS) + return 0; +- } + + printk(KERN_WARNING "aac_get_container_serial: aac_fib_send failed with status: %d.\n", status); + aac_fib_complete(cmd_fibcontext); +@@ -2335,16 +2332,14 @@ static int aac_read(struct scsi_cmnd * s + * Alocate and initialize a Fib + */ + cmd_fibcontext = aac_fib_alloc_tag(dev, scsicmd); +- ++ scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; + status = aac_adapter_read(cmd_fibcontext, scsicmd, lba, count); + + /* + * Check that the command queued to the controller + */ +- if (status == -EINPROGRESS) { +- scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; ++ if (status == -EINPROGRESS) + return 0; +- } + + printk(KERN_WARNING "aac_read: aac_fib_send failed with status: %d.\n", status); + /* +@@ -2429,16 +2424,14 @@ static int aac_write(struct scsi_cmnd * + * Allocate and initialize a Fib then setup a BlockWrite command + */ + cmd_fibcontext = aac_fib_alloc_tag(dev, scsicmd); +- ++ scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; + status = aac_adapter_write(cmd_fibcontext, scsicmd, lba, count, fua); + + /* + * Check that the command queued to the controller + */ +- if (status == -EINPROGRESS) { +- scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; ++ if (status == -EINPROGRESS) + return 0; +- } + + printk(KERN_WARNING "aac_write: aac_fib_send failed with status: %d\n", status); + /* +@@ -2588,6 +2581,7 @@ static int aac_synchronize(struct scsi_c + synchronizecmd->cid = cpu_to_le32(scmd_id(scsicmd)); + synchronizecmd->count = + cpu_to_le32(sizeof(((struct aac_synchronize_reply *)NULL)->data)); ++ scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; + + /* + * Now send the Fib to the adapter +@@ -2603,10 +2597,8 @@ static int aac_synchronize(struct scsi_c + /* + * Check that the command queued to the controller + */ +- if (status == -EINPROGRESS) { +- scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; ++ if (status == -EINPROGRESS) + return 0; +- } + + printk(KERN_WARNING + "aac_synchronize: aac_fib_send failed with status: %d.\n", status); +@@ -2666,6 +2658,7 @@ static int aac_start_stop(struct scsi_cm + pmcmd->cid = cpu_to_le32(sdev_id(sdev)); + pmcmd->parm = (scsicmd->cmnd[1] & 1) ? + cpu_to_le32(CT_PM_UNIT_IMMEDIATE) : 0; ++ scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; + + /* + * Now send the Fib to the adapter +@@ -2681,10 +2674,8 @@ static int aac_start_stop(struct scsi_cm + /* + * Check that the command queued to the controller + */ +- if (status == -EINPROGRESS) { +- scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; ++ if (status == -EINPROGRESS) + return 0; +- } + + aac_fib_complete(cmd_fibcontext); + aac_fib_free(cmd_fibcontext); +@@ -3692,16 +3683,14 @@ static int aac_send_srb_fib(struct scsi_ + * Allocate and initialize a Fib then setup a BlockWrite command + */ + cmd_fibcontext = aac_fib_alloc_tag(dev, scsicmd); +- ++ scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; + status = aac_adapter_scsi(cmd_fibcontext, scsicmd); + + /* + * Check that the command queued to the controller + */ +- if (status == -EINPROGRESS) { +- scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; ++ if (status == -EINPROGRESS) + return 0; +- } + + printk(KERN_WARNING "aac_srb: aac_fib_send failed with status: %d\n", status); + aac_fib_complete(cmd_fibcontext); +@@ -3739,15 +3728,14 @@ static int aac_send_hba_fib(struct scsi_ + if (!cmd_fibcontext) + return -1; + ++ scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; + status = aac_adapter_hba(cmd_fibcontext, scsicmd); + + /* + * Check that the command queued to the controller + */ +- if (status == -EINPROGRESS) { +- scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; ++ if (status == -EINPROGRESS) + return 0; +- } + + pr_warn("aac_hba_cmd_req: aac_fib_send failed with status: %d\n", + status); diff --git a/queue-4.13/scsi-megaraid_sas-check-valid-aen-class-range-to-avoid-kernel-panic.patch b/queue-4.13/scsi-megaraid_sas-check-valid-aen-class-range-to-avoid-kernel-panic.patch new file mode 100644 index 00000000000..7cfa8507fbe --- /dev/null +++ b/queue-4.13/scsi-megaraid_sas-check-valid-aen-class-range-to-avoid-kernel-panic.patch @@ -0,0 +1,37 @@ +From 91b3d9f0069c8307d0b3a4c6843b65a439183318 Mon Sep 17 00:00:00 2001 +From: Shivasharan S +Date: Wed, 23 Aug 2017 04:47:01 -0700 +Subject: scsi: megaraid_sas: Check valid aen class range to avoid kernel panic + +From: Shivasharan S + +commit 91b3d9f0069c8307d0b3a4c6843b65a439183318 upstream. + +Signed-off-by: Kashyap Desai +Signed-off-by: Shivasharan S +Reviewed-by: Hannes Reinecke +Reviewed-by: Tomas Henzl +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/megaraid/megaraid_sas_base.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/drivers/scsi/megaraid/megaraid_sas_base.c ++++ b/drivers/scsi/megaraid/megaraid_sas_base.c +@@ -5650,6 +5650,14 @@ megasas_register_aen(struct megasas_inst + prev_aen.word = + le32_to_cpu(instance->aen_cmd->frame->dcmd.mbox.w[1]); + ++ if ((curr_aen.members.class < MFI_EVT_CLASS_DEBUG) || ++ (curr_aen.members.class > MFI_EVT_CLASS_DEAD)) { ++ dev_info(&instance->pdev->dev, ++ "%s %d out of range class %d send by application\n", ++ __func__, __LINE__, curr_aen.members.class); ++ return 0; ++ } ++ + /* + * A class whose enum value is smaller is inclusive of all + * higher values. If a PROGRESS (= -1) was previously diff --git a/queue-4.13/scsi-megaraid_sas-mismatch-of-allocated-mfi-frame-size-and-length-exposed-in-mfi-mpt-pass-through-command.patch b/queue-4.13/scsi-megaraid_sas-mismatch-of-allocated-mfi-frame-size-and-length-exposed-in-mfi-mpt-pass-through-command.patch new file mode 100644 index 00000000000..6ba5c678d3d --- /dev/null +++ b/queue-4.13/scsi-megaraid_sas-mismatch-of-allocated-mfi-frame-size-and-length-exposed-in-mfi-mpt-pass-through-command.patch @@ -0,0 +1,35 @@ +From ed2983f458bed9dc827ec60c8486253b1669bb52 Mon Sep 17 00:00:00 2001 +From: Shivasharan S +Date: Wed, 23 Aug 2017 04:46:55 -0700 +Subject: scsi: megaraid_sas: mismatch of allocated MFI frame size and length exposed in MFI MPT pass through command + +From: Shivasharan S + +commit ed2983f458bed9dc827ec60c8486253b1669bb52 upstream. + +Driver allocated 256 byte MFI frames bytes but while sending MFI frame +(embedded inside chain frame of MPT frame) to firmware, driver sets the +length as 4k. This results in DMA read error messages during boot. + +Signed-off-by: Kashyap Desai +Signed-off-by: Shivasharan S +Reviewed-by: Hannes Reinecke +Reviewed-by: Tomas Henzl +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/megaraid/megaraid_sas_fusion.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c ++++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c +@@ -3287,7 +3287,7 @@ build_mpt_mfi_pass_thru(struct megasas_i + mpi25_ieee_chain->Flags = IEEE_SGE_FLAGS_CHAIN_ELEMENT | + MPI2_IEEE_SGE_FLAGS_IOCPLBNTA_ADDR; + +- mpi25_ieee_chain->Length = cpu_to_le32(instance->max_chain_frame_sz); ++ mpi25_ieee_chain->Length = cpu_to_le32(instance->mfi_frame_size); + } + + /** diff --git a/queue-4.13/scsi-megaraid_sas-return-pended-ioctls-with-cmd_status-mfi_stat_wrong_state-in-case-adapter-is-dead.patch b/queue-4.13/scsi-megaraid_sas-return-pended-ioctls-with-cmd_status-mfi_stat_wrong_state-in-case-adapter-is-dead.patch new file mode 100644 index 00000000000..94cb4400ed6 --- /dev/null +++ b/queue-4.13/scsi-megaraid_sas-return-pended-ioctls-with-cmd_status-mfi_stat_wrong_state-in-case-adapter-is-dead.patch @@ -0,0 +1,40 @@ +From eb3fe263a48b0d27b229c213929c4cb3b1b39a0f Mon Sep 17 00:00:00 2001 +From: Shivasharan S +Date: Wed, 23 Aug 2017 04:47:04 -0700 +Subject: scsi: megaraid_sas: Return pended IOCTLs with cmd_status MFI_STAT_WRONG_STATE in case adapter is dead + +From: Shivasharan S + +commit eb3fe263a48b0d27b229c213929c4cb3b1b39a0f upstream. + +After a kill adapter, since the cmd_status is not set, the IOCTLs will +be hung in driver resulting in application hang. Set cmd_status +MFI_STAT_WRONG_STATE when completing pended IOCTLs. + +Signed-off-by: Kashyap Desai +Signed-off-by: Shivasharan S +Reviewed-by: Hannes Reinecke +Reviewed-by: Tomas Henzl +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/megaraid/megaraid_sas_base.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/scsi/megaraid/megaraid_sas_base.c ++++ b/drivers/scsi/megaraid/megaraid_sas_base.c +@@ -1995,9 +1995,12 @@ static void megasas_complete_outstanding + if (cmd_fusion->sync_cmd_idx != (u32)ULONG_MAX) { + cmd_mfi = instance->cmd_list[cmd_fusion->sync_cmd_idx]; + if (cmd_mfi->sync_cmd && +- cmd_mfi->frame->hdr.cmd != MFI_CMD_ABORT) ++ (cmd_mfi->frame->hdr.cmd != MFI_CMD_ABORT)) { ++ cmd_mfi->frame->hdr.cmd_status = ++ MFI_STAT_WRONG_STATE; + megasas_complete_cmd(instance, + cmd_mfi, DID_OK); ++ } + } + } + } else { diff --git a/queue-4.13/scsi-megaraid_sas-set-minimum-value-of-resetwaittime-to-be-1-secs.patch b/queue-4.13/scsi-megaraid_sas-set-minimum-value-of-resetwaittime-to-be-1-secs.patch new file mode 100644 index 00000000000..c08676aa3ff --- /dev/null +++ b/queue-4.13/scsi-megaraid_sas-set-minimum-value-of-resetwaittime-to-be-1-secs.patch @@ -0,0 +1,35 @@ +From e636a7a430f41efb0ff2727960ce61ef9f8f6769 Mon Sep 17 00:00:00 2001 +From: Shivasharan S +Date: Wed, 23 Aug 2017 04:46:56 -0700 +Subject: scsi: megaraid_sas: set minimum value of resetwaittime to be 1 secs + +From: Shivasharan S + +commit e636a7a430f41efb0ff2727960ce61ef9f8f6769 upstream. + +Setting resetwaittime to 0 during a FW fault will result in driver not +calling the OCR. + +Signed-off-by: Kashyap Desai +Signed-off-by: Shivasharan S +Reviewed-by: Hannes Reinecke +Reviewed-by: Tomas Henzl +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/megaraid/megaraid_sas_base.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/scsi/megaraid/megaraid_sas_base.c ++++ b/drivers/scsi/megaraid/megaraid_sas_base.c +@@ -5478,7 +5478,8 @@ static int megasas_init_fw(struct megasa + instance->throttlequeuedepth = + MEGASAS_THROTTLE_QUEUE_DEPTH; + +- if (resetwaittime > MEGASAS_RESET_WAIT_TIME) ++ if ((resetwaittime < 1) || ++ (resetwaittime > MEGASAS_RESET_WAIT_TIME)) + resetwaittime = MEGASAS_RESET_WAIT_TIME; + + if ((scmd_timeout < 10) || (scmd_timeout > MEGASAS_DEFAULT_CMD_TIMEOUT)) diff --git a/queue-4.13/scsi-qedi-off-by-one-in-qedi_get_cmd_from_tid.patch b/queue-4.13/scsi-qedi-off-by-one-in-qedi_get_cmd_from_tid.patch new file mode 100644 index 00000000000..1883247b1cf --- /dev/null +++ b/queue-4.13/scsi-qedi-off-by-one-in-qedi_get_cmd_from_tid.patch @@ -0,0 +1,34 @@ +From fa2d9d6e894e096678a50ef0f65f7a8c3d8a40b8 Mon Sep 17 00:00:00 2001 +From: Dan Carpenter +Date: Fri, 25 Aug 2017 13:36:57 +0300 +Subject: scsi: qedi: off by one in qedi_get_cmd_from_tid() + +From: Dan Carpenter + +commit fa2d9d6e894e096678a50ef0f65f7a8c3d8a40b8 upstream. + +The > here should be >= or we end up reading one element beyond the end +of the qedi->itt_map[] array. The qedi->itt_map[] array is allocated in +qedi_alloc_itt(). + +Fixes: ace7f46ba5fd ("scsi: qedi: Add QLogic FastLinQ offload iSCSI driver framework.") +Signed-off-by: Dan Carpenter +Acked-by: Manish Rangankar +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/qedi/qedi_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/qedi/qedi_main.c ++++ b/drivers/scsi/qedi/qedi_main.c +@@ -1575,7 +1575,7 @@ struct qedi_cmd *qedi_get_cmd_from_tid(s + { + struct qedi_cmd *cmd = NULL; + +- if (tid > MAX_ISCSI_TASK_ENTRIES) ++ if (tid >= MAX_ISCSI_TASK_ENTRIES) + return NULL; + + cmd = qedi->itt_map[tid].p_cmd; diff --git a/queue-4.13/scsi-sg-factor-out-sg_fill_request_table.patch b/queue-4.13/scsi-sg-factor-out-sg_fill_request_table.patch new file mode 100644 index 00000000000..b05c36be57f --- /dev/null +++ b/queue-4.13/scsi-sg-factor-out-sg_fill_request_table.patch @@ -0,0 +1,106 @@ +From 4759df905a474d245752c9dc94288e779b8734dd Mon Sep 17 00:00:00 2001 +From: Hannes Reinecke +Date: Fri, 15 Sep 2017 14:05:15 +0200 +Subject: scsi: sg: factor out sg_fill_request_table() + +From: Hannes Reinecke + +commit 4759df905a474d245752c9dc94288e779b8734dd upstream. + +Factor out sg_fill_request_table() for better readability. + +[mkp: typos, applied by hand] + +Signed-off-by: Hannes Reinecke +Reviewed-by: Bart Van Assche +Reviewed-by: Christoph Hellwig +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/sg.c | 61 ++++++++++++++++++++++++++++++------------------------ + 1 file changed, 35 insertions(+), 26 deletions(-) + +--- a/drivers/scsi/sg.c ++++ b/drivers/scsi/sg.c +@@ -828,6 +828,40 @@ static int max_sectors_bytes(struct requ + return max_sectors << 9; + } + ++static void ++sg_fill_request_table(Sg_fd *sfp, sg_req_info_t *rinfo) ++{ ++ Sg_request *srp; ++ int val; ++ unsigned int ms; ++ ++ val = 0; ++ list_for_each_entry(srp, &sfp->rq_list, entry) { ++ if (val > SG_MAX_QUEUE) ++ break; ++ memset(&rinfo[val], 0, SZ_SG_REQ_INFO); ++ rinfo[val].req_state = srp->done + 1; ++ rinfo[val].problem = ++ srp->header.masked_status & ++ srp->header.host_status & ++ srp->header.driver_status; ++ if (srp->done) ++ rinfo[val].duration = ++ srp->header.duration; ++ else { ++ ms = jiffies_to_msecs(jiffies); ++ rinfo[val].duration = ++ (ms > srp->header.duration) ? ++ (ms - srp->header.duration) : 0; ++ } ++ rinfo[val].orphan = srp->orphan; ++ rinfo[val].sg_io_owned = srp->sg_io_owned; ++ rinfo[val].pack_id = srp->header.pack_id; ++ rinfo[val].usr_ptr = srp->header.usr_ptr; ++ val++; ++ } ++} ++ + static long + sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) + { +@@ -1012,38 +1046,13 @@ sg_ioctl(struct file *filp, unsigned int + return -EFAULT; + else { + sg_req_info_t *rinfo; +- unsigned int ms; + + rinfo = kmalloc(SZ_SG_REQ_INFO * SG_MAX_QUEUE, + GFP_KERNEL); + if (!rinfo) + return -ENOMEM; + read_lock_irqsave(&sfp->rq_list_lock, iflags); +- val = 0; +- list_for_each_entry(srp, &sfp->rq_list, entry) { +- if (val >= SG_MAX_QUEUE) +- break; +- memset(&rinfo[val], 0, SZ_SG_REQ_INFO); +- rinfo[val].req_state = srp->done + 1; +- rinfo[val].problem = +- srp->header.masked_status & +- srp->header.host_status & +- srp->header.driver_status; +- if (srp->done) +- rinfo[val].duration = +- srp->header.duration; +- else { +- ms = jiffies_to_msecs(jiffies); +- rinfo[val].duration = +- (ms > srp->header.duration) ? +- (ms - srp->header.duration) : 0; +- } +- rinfo[val].orphan = srp->orphan; +- rinfo[val].sg_io_owned = srp->sg_io_owned; +- rinfo[val].pack_id = srp->header.pack_id; +- rinfo[val].usr_ptr = srp->header.usr_ptr; +- val++; +- } ++ sg_fill_request_table(sfp, rinfo); + read_unlock_irqrestore(&sfp->rq_list_lock, iflags); + result = __copy_to_user(p, rinfo, + SZ_SG_REQ_INFO * SG_MAX_QUEUE); diff --git a/queue-4.13/scsi-storvsc-fix-memory-leak-on-ring-buffer-busy.patch b/queue-4.13/scsi-storvsc-fix-memory-leak-on-ring-buffer-busy.patch new file mode 100644 index 00000000000..46a3469d57f --- /dev/null +++ b/queue-4.13/scsi-storvsc-fix-memory-leak-on-ring-buffer-busy.patch @@ -0,0 +1,38 @@ +From 0208eeaa650c5c866a3242201678a19e6dc4a14e Mon Sep 17 00:00:00 2001 +From: Long Li +Date: Mon, 28 Aug 2017 17:43:59 -0700 +Subject: scsi: storvsc: fix memory leak on ring buffer busy + +From: Long Li + +commit 0208eeaa650c5c866a3242201678a19e6dc4a14e upstream. + +When storvsc is sending I/O to Hyper-v, it may allocate a bigger buffer +descriptor for large data payload that can't fit into a pre-allocated +buffer descriptor. This bigger buffer is freed on return path. + +If I/O request to Hyper-v fails due to ring buffer busy, the storvsc +allocated buffer descriptor should also be freed. + +[mkp: applied by hand] + +Fixes: be0cf6ca301c ("scsi: storvsc: Set the tablesize based on the information given by the host") +Signed-off-by: Long Li +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/scsi/storvsc_drv.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/scsi/storvsc_drv.c ++++ b/drivers/scsi/storvsc_drv.c +@@ -1640,6 +1640,8 @@ static int storvsc_queuecommand(struct S + put_cpu(); + + if (ret == -EAGAIN) { ++ if (payload_sz > sizeof(cmd_request->mpb)) ++ kfree(payload); + /* no more space */ + return SCSI_MLQUEUE_DEVICE_BUSY; + } diff --git a/queue-4.13/scsi-zfcp-add-handling-for-fcp_resid_over-to-the-fcp-ingress-path.patch b/queue-4.13/scsi-zfcp-add-handling-for-fcp_resid_over-to-the-fcp-ingress-path.patch new file mode 100644 index 00000000000..db1ab46be11 --- /dev/null +++ b/queue-4.13/scsi-zfcp-add-handling-for-fcp_resid_over-to-the-fcp-ingress-path.patch @@ -0,0 +1,90 @@ +From a099b7b1fc1f0418ab8d79ecf98153e1e134656e Mon Sep 17 00:00:00 2001 +From: Benjamin Block +Date: Fri, 28 Jul 2017 12:30:52 +0200 +Subject: scsi: zfcp: add handling for FCP_RESID_OVER to the fcp ingress path + +From: Benjamin Block + +commit a099b7b1fc1f0418ab8d79ecf98153e1e134656e upstream. + +Up until now zfcp would just ignore the FCP_RESID_OVER flag in the FCP +response IU. When this flag is set, it is possible, in regards to the +FCP standard, that the storage-server processes the command normally, up +to the point where data is missing and simply ignores those. + +In this case no CHECK CONDITION would be set, and because we ignored the +FCP_RESID_OVER flag we resulted in at least a data loss or even +-corruption as a follow-up error, depending on how the +applications/layers on top behave. To prevent this, we now set the +host-byte of the corresponding scsi_cmnd to DID_ERROR. + +Other storage-behaviors, where the same condition results in a CHECK +CONDITION set in the answer, don't need to be changed as they are +handled in the mid-layer already. + +Following is an example trace record decoded with zfcpdbf from the +s390-tools package. We forcefully injected a fc_dl which is one byte too +small: + +Timestamp : ... +Area : SCSI +Subarea : 00 +Level : 3 +Exception : - +CPU ID : .. +Caller : 0x... +Record ID : 1 +Tag : rsl_err +Request ID : 0x... +SCSI ID : 0x... +SCSI LUN : 0x... +SCSI result : 0x00070000 + ^^DID_ERROR +SCSI retries : 0x.. +SCSI allowed : 0x.. +SCSI scribble : 0x... +SCSI opcode : 2a000000 00000000 08000000 00000000 +FCP rsp inf cod: 0x00 +FCP rsp IU : 00000000 00000000 00000400 00000001 + ^^fr_flags==FCP_RESID_OVER + ^^fr_status==SAM_STAT_GOOD + ^^^^^^^^fr_resid + 00000000 00000000 + +As of now, we don't actively handle to possibility that a response IU +has both flags - FCP_RESID_OVER and FCP_RESID_UNDER - set at once. + +Reported-by: Luke M. Hopkins +Reviewed-by: Steffen Maier +Fixes: 553448f6c483 ("[SCSI] zfcp: Message cleanup") +Fixes: ea127f975424 ("[PATCH] s390 (7/7): zfcp host adapter.") (tglx/history.git) +Signed-off-by: Benjamin Block +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/s390/scsi/zfcp_fc.h | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/s390/scsi/zfcp_fc.h ++++ b/drivers/s390/scsi/zfcp_fc.h +@@ -4,7 +4,7 @@ + * Fibre Channel related definitions and inline functions for the zfcp + * device driver + * +- * Copyright IBM Corp. 2009 ++ * Copyright IBM Corp. 2009, 2017 + */ + + #ifndef ZFCP_FC_H +@@ -279,6 +279,10 @@ void zfcp_fc_eval_fcp_rsp(struct fcp_res + !(rsp_flags & FCP_SNS_LEN_VAL) && + fcp_rsp->resp.fr_status == SAM_STAT_GOOD) + set_host_byte(scsi, DID_ERROR); ++ } else if (unlikely(rsp_flags & FCP_RESID_OVER)) { ++ /* FCP_DL was not sufficient for SCSI data length */ ++ if (fcp_rsp->resp.fr_status == SAM_STAT_GOOD) ++ set_host_byte(scsi, DID_ERROR); + } + } + diff --git a/queue-4.13/scsi-zfcp-fix-capping-of-unsuccessful-gpn_ft-san-response-trace-records.patch b/queue-4.13/scsi-zfcp-fix-capping-of-unsuccessful-gpn_ft-san-response-trace-records.patch new file mode 100644 index 00000000000..41c2ddcbbd0 --- /dev/null +++ b/queue-4.13/scsi-zfcp-fix-capping-of-unsuccessful-gpn_ft-san-response-trace-records.patch @@ -0,0 +1,149 @@ +From 975171b4461be296a35e83ebd748946b81cf0635 Mon Sep 17 00:00:00 2001 +From: Steffen Maier +Date: Fri, 28 Jul 2017 12:30:53 +0200 +Subject: scsi: zfcp: fix capping of unsuccessful GPN_FT SAN response trace records + +From: Steffen Maier + +commit 975171b4461be296a35e83ebd748946b81cf0635 upstream. + +v4.9 commit aceeffbb59bb ("zfcp: trace full payload of all SAN records +(req,resp,iels)") fixed trace data loss of 2.6.38 commit 2c55b750a884 +("[SCSI] zfcp: Redesign of the debug tracing for SAN records.") +necessary for problem determination, e.g. to see the +currently active zone set during automatic port scan. + +While it already saves space by not dumping any empty residual entries +of the large successful GPN_FT response (4 pages), there are seldom cases +where the GPN_FT response is unsuccessful and likely does not have +FC_NS_FID_LAST set in fp_flags so we did not cap the trace record. +We typically see such case for an initiator WWPN, which is not in any zone. + +Cap unsuccessful responses to at least the actual basic CT_IU response +plus whatever fits the SAN trace record built-in "payload" buffer +just in case there's trailing information +of which we would at least see the existence and its beginning. + +In order not to erroneously cap successful responses, we need to swap +calling the trace function and setting the CT / ELS status to success (0). + +Example trace record pair formatted with zfcpdbf: + +Timestamp : ... +Area : SAN +Subarea : 00 +Level : 1 +Exception : - +CPU ID : .. +Caller : 0x... +Record ID : 1 +Tag : fssct_1 +Request ID : 0x +Destination ID : 0x00fffffc +SAN req short : 01000000 fc020000 01720ffc 00000000 + 00000008 +SAN req length : 20 +| +Timestamp : ... +Area : SAN +Subarea : 00 +Level : 1 +Exception : - +CPU ID : .. +Caller : 0x... +Record ID : 2 +Tag : fsscth2 +Request ID : 0x +Destination ID : 0x00fffffc +SAN resp short : 01000000 fc020000 80010000 00090700 + 00000000 00000000 00000000 00000000 [trailing info] + 00000000 00000000 00000000 00000000 [trailing info] +SAN resp length: 16384 +San resp info : 01000000 fc020000 80010000 00090700 + 00000000 00000000 00000000 00000000 [trailing info] + 00000000 00000000 00000000 00000000 [trailing info] + 00000000 00000000 00000000 00000000 [trailing info] + 00000000 00000000 00000000 00000000 [trailing info] + 00000000 00000000 00000000 00000000 [trailing info] + 00000000 00000000 00000000 00000000 [trailing info] + 00000000 00000000 00000000 00000000 [trailing info] + 00000000 00000000 00000000 00000000 [trailing info] + 00000000 00000000 00000000 00000000 [trailing info] + 00000000 00000000 00000000 00000000 [trailing info] + 00000000 00000000 00000000 00000000 [trailing info] + 00000000 00000000 00000000 00000000 [trailing info] + 00000000 00000000 00000000 00000000 [trailing info] + 00000000 00000000 00000000 00000000 [trailing info] + 00000000 00000000 00000000 00000000 [trailing info] + +The fix saves all but one of the previously associated 64 PAYload trace +record chunks of size 256 bytes each. + +Signed-off-by: Steffen Maier +Fixes: aceeffbb59bb ("zfcp: trace full payload of all SAN records (req,resp,iels)") +Fixes: 2c55b750a884 ("[SCSI] zfcp: Redesign of the debug tracing for SAN records.") +Reviewed-by: Benjamin Block +Signed-off-by: Benjamin Block +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/s390/scsi/zfcp_dbf.c | 10 +++++++++- + drivers/s390/scsi/zfcp_fsf.c | 4 ++-- + 2 files changed, 11 insertions(+), 3 deletions(-) + +--- a/drivers/s390/scsi/zfcp_dbf.c ++++ b/drivers/s390/scsi/zfcp_dbf.c +@@ -3,7 +3,7 @@ + * + * Debug traces for zfcp. + * +- * Copyright IBM Corp. 2002, 2016 ++ * Copyright IBM Corp. 2002, 2017 + */ + + #define KMSG_COMPONENT "zfcp" +@@ -447,6 +447,7 @@ static u16 zfcp_dbf_san_res_cap_len_if_g + struct fc_ct_hdr *reqh = sg_virt(ct_els->req); + struct fc_ns_gid_ft *reqn = (struct fc_ns_gid_ft *)(reqh + 1); + struct scatterlist *resp_entry = ct_els->resp; ++ struct fc_ct_hdr *resph; + struct fc_gpn_ft_resp *acc; + int max_entries, x, last = 0; + +@@ -473,6 +474,13 @@ static u16 zfcp_dbf_san_res_cap_len_if_g + return len; /* not GPN_FT response so do not cap */ + + acc = sg_virt(resp_entry); ++ ++ /* cap all but accept CT responses to at least the CT header */ ++ resph = (struct fc_ct_hdr *)acc; ++ if ((ct_els->status) || ++ (resph->ct_cmd != cpu_to_be16(FC_FS_ACC))) ++ return max(FC_CT_HDR_LEN, ZFCP_DBF_SAN_MAX_PAYLOAD); ++ + max_entries = (reqh->ct_mr_size * 4 / sizeof(struct fc_gpn_ft_resp)) + + 1 /* zfcp_fc_scan_ports: bytes correct, entries off-by-one + * to account for header as 1st pseudo "entry" */; +--- a/drivers/s390/scsi/zfcp_fsf.c ++++ b/drivers/s390/scsi/zfcp_fsf.c +@@ -928,8 +928,8 @@ static void zfcp_fsf_send_ct_handler(str + + switch (header->fsf_status) { + case FSF_GOOD: +- zfcp_dbf_san_res("fsscth2", req); + ct->status = 0; ++ zfcp_dbf_san_res("fsscth2", req); + break; + case FSF_SERVICE_CLASS_NOT_SUPPORTED: + zfcp_fsf_class_not_supp(req); +@@ -1109,8 +1109,8 @@ static void zfcp_fsf_send_els_handler(st + + switch (header->fsf_status) { + case FSF_GOOD: +- zfcp_dbf_san_res("fsselh1", req); + send_els->status = 0; ++ zfcp_dbf_san_res("fsselh1", req); + break; + case FSF_SERVICE_CLASS_NOT_SUPPORTED: + zfcp_fsf_class_not_supp(req); diff --git a/queue-4.13/scsi-zfcp-fix-missing-trace-records-for-early-returns-in-tmf-eh-handlers.patch b/queue-4.13/scsi-zfcp-fix-missing-trace-records-for-early-returns-in-tmf-eh-handlers.patch new file mode 100644 index 00000000000..357b96bb502 --- /dev/null +++ b/queue-4.13/scsi-zfcp-fix-missing-trace-records-for-early-returns-in-tmf-eh-handlers.patch @@ -0,0 +1,62 @@ +From 1a5d999ebfc7bfe28deb48931bb57faa8e4102b6 Mon Sep 17 00:00:00 2001 +From: Steffen Maier +Date: Fri, 28 Jul 2017 12:30:55 +0200 +Subject: scsi: zfcp: fix missing trace records for early returns in TMF eh handlers + +From: Steffen Maier + +commit 1a5d999ebfc7bfe28deb48931bb57faa8e4102b6 upstream. + +For problem determination we need to see that we were in scsi_eh +as well as whether and why we were successful or not. + +The following commits introduced new early returns without adding +a trace record: + +v2.6.35 commit a1dbfddd02d2 +("[SCSI] zfcp: Pass return code from fc_block_scsi_eh to scsi eh") +on fc_block_scsi_eh() returning != 0 which is FAST_IO_FAIL, + +v2.6.30 commit 63caf367e1c9 +("[SCSI] zfcp: Improve reliability of SCSI eh handlers in zfcp") +on not having gotten an FSF request after the maximum number of retry +attempts and thus could not issue a TMF and has to return FAILED. + +Signed-off-by: Steffen Maier +Fixes: a1dbfddd02d2 ("[SCSI] zfcp: Pass return code from fc_block_scsi_eh to scsi eh") +Fixes: 63caf367e1c9 ("[SCSI] zfcp: Improve reliability of SCSI eh handlers in zfcp") +Reviewed-by: Benjamin Block +Signed-off-by: Benjamin Block +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/s390/scsi/zfcp_scsi.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/drivers/s390/scsi/zfcp_scsi.c ++++ b/drivers/s390/scsi/zfcp_scsi.c +@@ -273,8 +273,10 @@ static int zfcp_task_mgmt_function(struc + + zfcp_erp_wait(adapter); + ret = fc_block_scsi_eh(scpnt); +- if (ret) ++ if (ret) { ++ zfcp_dbf_scsi_devreset("fiof", scpnt, tm_flags, NULL); + return ret; ++ } + + if (!(atomic_read(&adapter->status) & + ZFCP_STATUS_COMMON_RUNNING)) { +@@ -282,8 +284,10 @@ static int zfcp_task_mgmt_function(struc + return SUCCESS; + } + } +- if (!fsf_req) ++ if (!fsf_req) { ++ zfcp_dbf_scsi_devreset("reqf", scpnt, tm_flags, NULL); + return FAILED; ++ } + + wait_for_completion(&fsf_req->completion); + diff --git a/queue-4.13/scsi-zfcp-fix-passing-fsf_req-to-scsi-trace-on-tmf-to-correlate-with-hba.patch b/queue-4.13/scsi-zfcp-fix-passing-fsf_req-to-scsi-trace-on-tmf-to-correlate-with-hba.patch new file mode 100644 index 00000000000..ba88a760d13 --- /dev/null +++ b/queue-4.13/scsi-zfcp-fix-passing-fsf_req-to-scsi-trace-on-tmf-to-correlate-with-hba.patch @@ -0,0 +1,136 @@ +From 9fe5d2b2fd30aa8c7827ec62cbbe6d30df4fe3e3 Mon Sep 17 00:00:00 2001 +From: Steffen Maier +Date: Fri, 28 Jul 2017 12:30:54 +0200 +Subject: scsi: zfcp: fix passing fsf_req to SCSI trace on TMF to correlate with HBA + +From: Steffen Maier + +commit 9fe5d2b2fd30aa8c7827ec62cbbe6d30df4fe3e3 upstream. + +Without this fix we get SCSI trace records on task management functions +which cannot be correlated to HBA trace records because all fields +related to the FSF request are empty (zero). +Also, the FCP_RSP_IU is missing as well as any sense data if available. + +This was caused by v2.6.14 commit 8a36e4532ea1 ("[SCSI] zfcp: enhancement +of zfcp debug features") introducing trace records for TMFs but +hard coding NULL for a possibly existing TMF FSF request. +The scsi_cmnd scribble is also zero or unrelated for the TMF request +so it also could not lookup a suitable FSF request from there. + +A broken example trace record formatted with zfcpdbf from the s390-tools +package: + +Timestamp : ... +Area : SCSI +Subarea : 00 +Level : 1 +Exception : - +CPU ID : .. +Caller : 0x... +Record ID : 1 +Tag : lr_fail +Request ID : 0x0000000000000000 + ^^^^^^^^^^^^^^^^ no correlation to HBA record +SCSI ID : 0x +SCSI LUN : 0x +SCSI result : 0x000e0000 +SCSI retries : 0x00 +SCSI allowed : 0x05 +SCSI scribble : 0x0000000000000000 +SCSI opcode : 2a000017 3bb80000 08000000 00000000 +FCP rsp inf cod: 0x00 + ^^ no TMF response +FCP rsp IU : 00000000 00000000 00000000 00000000 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + 00000000 00000000 + ^^^^^^^^^^^^^^^^^ no interesting FCP_RSP_IU +Sense len : ... +^^^^^^^^^^^^^^^^^^^^ no sense data length +Sense info : ... +^^^^^^^^^^^^^^^^^^^^ no sense data content, even if present + +There are some true cases where we really do not have an FSF request: +"rsl_fai" from zfcp_dbf_scsi_fail_send() called for early +returns / completions in zfcp_scsi_queuecommand(), +"abrt_or", "abrt_bl", "abrt_ru", "abrt_ar" from +zfcp_scsi_eh_abort_handler() where we did not get as far, +"lr_nres", "tr_nres" from zfcp_task_mgmt_function() where we're +successful and do not need to do anything because adapter stopped. +For these cases it's correct to pass NULL for fsf_req to _zfcp_dbf_scsi(). + +Signed-off-by: Steffen Maier +Fixes: 8a36e4532ea1 ("[SCSI] zfcp: enhancement of zfcp debug features") +Reviewed-by: Benjamin Block +Signed-off-by: Benjamin Block +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/s390/scsi/zfcp_dbf.h | 7 ++++--- + drivers/s390/scsi/zfcp_scsi.c | 8 ++++---- + 2 files changed, 8 insertions(+), 7 deletions(-) + +--- a/drivers/s390/scsi/zfcp_dbf.h ++++ b/drivers/s390/scsi/zfcp_dbf.h +@@ -2,7 +2,7 @@ + * zfcp device driver + * debug feature declarations + * +- * Copyright IBM Corp. 2008, 2016 ++ * Copyright IBM Corp. 2008, 2017 + */ + + #ifndef ZFCP_DBF_H +@@ -401,7 +401,8 @@ void zfcp_dbf_scsi_abort(char *tag, stru + * @flag: indicates type of reset (Target Reset, Logical Unit Reset) + */ + static inline +-void zfcp_dbf_scsi_devreset(char *tag, struct scsi_cmnd *scmnd, u8 flag) ++void zfcp_dbf_scsi_devreset(char *tag, struct scsi_cmnd *scmnd, u8 flag, ++ struct zfcp_fsf_req *fsf_req) + { + char tmp_tag[ZFCP_DBF_TAG_LEN]; + +@@ -411,7 +412,7 @@ void zfcp_dbf_scsi_devreset(char *tag, s + memcpy(tmp_tag, "lr_", 3); + + memcpy(&tmp_tag[3], tag, 4); +- _zfcp_dbf_scsi(tmp_tag, 1, scmnd, NULL); ++ _zfcp_dbf_scsi(tmp_tag, 1, scmnd, fsf_req); + } + + /** +--- a/drivers/s390/scsi/zfcp_scsi.c ++++ b/drivers/s390/scsi/zfcp_scsi.c +@@ -3,7 +3,7 @@ + * + * Interface to Linux SCSI midlayer. + * +- * Copyright IBM Corp. 2002, 2016 ++ * Copyright IBM Corp. 2002, 2017 + */ + + #define KMSG_COMPONENT "zfcp" +@@ -278,7 +278,7 @@ static int zfcp_task_mgmt_function(struc + + if (!(atomic_read(&adapter->status) & + ZFCP_STATUS_COMMON_RUNNING)) { +- zfcp_dbf_scsi_devreset("nres", scpnt, tm_flags); ++ zfcp_dbf_scsi_devreset("nres", scpnt, tm_flags, NULL); + return SUCCESS; + } + } +@@ -288,10 +288,10 @@ static int zfcp_task_mgmt_function(struc + wait_for_completion(&fsf_req->completion); + + if (fsf_req->status & ZFCP_STATUS_FSFREQ_TMFUNCFAILED) { +- zfcp_dbf_scsi_devreset("fail", scpnt, tm_flags); ++ zfcp_dbf_scsi_devreset("fail", scpnt, tm_flags, fsf_req); + retval = FAILED; + } else { +- zfcp_dbf_scsi_devreset("okay", scpnt, tm_flags); ++ zfcp_dbf_scsi_devreset("okay", scpnt, tm_flags, fsf_req); + zfcp_scsi_forget_cmnds(zfcp_sdev, tm_flags); + } + diff --git a/queue-4.13/scsi-zfcp-fix-payload-with-full-fcp_rsp-iu-in-scsi-trace-records.patch b/queue-4.13/scsi-zfcp-fix-payload-with-full-fcp_rsp-iu-in-scsi-trace-records.patch new file mode 100644 index 00000000000..7d4ef72e0e6 --- /dev/null +++ b/queue-4.13/scsi-zfcp-fix-payload-with-full-fcp_rsp-iu-in-scsi-trace-records.patch @@ -0,0 +1,189 @@ +From 12c3e5754c8022a4f2fd1e9f00d19e99ee0d3cc1 Mon Sep 17 00:00:00 2001 +From: Steffen Maier +Date: Fri, 28 Jul 2017 12:30:56 +0200 +Subject: scsi: zfcp: fix payload with full FCP_RSP IU in SCSI trace records + +From: Steffen Maier + +commit 12c3e5754c8022a4f2fd1e9f00d19e99ee0d3cc1 upstream. + +If the FCP_RSP UI has optional parts (FCP_SNS_INFO or FCP_RSP_INFO) and +thus does not fit into the fsp_rsp field built into a SCSI trace record, +trace the full FCP_RSP UI with all optional parts as payload record +instead of just FCP_SNS_INFO as payload and +a 1 byte RSP_INFO_CODE part of FCP_RSP_INFO built into the SCSI record. + +That way we would also get the full FCP_SNS_INFO in case a +target would ever send more than +min(SCSI_SENSE_BUFFERSIZE==96, ZFCP_DBF_PAY_MAX_REC==256)==96. + +The mandatory part of FCP_RSP IU is only 24 bytes. +PAYload costs at least one full PAY record of 256 bytes anyway. +We cap to the hardware response size which is only FSF_FCP_RSP_SIZE==128. +So we can just put the whole FCP_RSP IU with any optional parts into +PAYload similarly as we do for SAN PAY since v4.9 commit aceeffbb59bb +("zfcp: trace full payload of all SAN records (req,resp,iels)"). +This does not cause any additional trace records wasting memory. + +Decoded trace records were confusing because they showed a hard-coded +sense data length of 96 even if the FCP_RSP_IU field FCP_SNS_LEN showed +actually less. + +Since the same commit, we set pl_len for SAN traces to the full length of a +request/response even if we cap the corresponding trace. +In contrast, here for SCSI traces we set pl_len to the pre-computed +length of FCP_RSP IU considering SNS_LEN or RSP_LEN if valid. +Nonetheless we trace a hardcoded payload of length FSF_FCP_RSP_SIZE==128 +if there were optional parts. +This makes it easier for the zfcpdbf tool to format only the relevant +part of the long FCP_RSP UI buffer. And any trailing information is still +available in the payload trace record just in case. + +Rename the payload record tag from "fcp_sns" to "fcp_riu" to make the new +content explicit to zfcpdbf which can then pick a suitable field name such +as "FCP rsp IU all:" instead of "Sense info :" +Also, the same zfcpdbf can still be backwards compatible with "fcp_sns". + +Old example trace record before this fix, formatted with the tool zfcpdbf +from s390-tools: + +Timestamp : ... +Area : SCSI +Subarea : 00 +Level : 3 +Exception : - +CPU id : .. +Caller : 0x... +Record id : 1 +Tag : rsl_err +Request id : 0x +SCSI ID : 0x... +SCSI LUN : 0x... +SCSI result : 0x00000002 +SCSI retries : 0x00 +SCSI allowed : 0x05 +SCSI scribble : 0x +SCSI opcode : 00000000 00000000 00000000 00000000 +FCP rsp inf cod: 0x00 +FCP rsp IU : 00000000 00000000 00000202 00000000 + ^^==FCP_SNS_LEN_VALID + 00000020 00000000 + ^^^^^^^^==FCP_SNS_LEN==32 +Sense len : 96 <==min(SCSI_SENSE_BUFFERSIZE,ZFCP_DBF_PAY_MAX_REC) +Sense info : 70000600 00000018 00000000 29000000 + 00000400 00000000 00000000 00000000 + 00000000 00000000 00000000 00000000<==superfluous + 00000000 00000000 00000000 00000000<==superfluous + 00000000 00000000 00000000 00000000<==superfluous + 00000000 00000000 00000000 00000000<==superfluous + +New example trace records with this fix: + +Timestamp : ... +Area : SCSI +Subarea : 00 +Level : 3 +Exception : - +CPU ID : .. +Caller : 0x... +Record ID : 1 +Tag : rsl_err +Request ID : 0x +SCSI ID : 0x... +SCSI LUN : 0x... +SCSI result : 0x00000002 +SCSI retries : 0x00 +SCSI allowed : 0x03 +SCSI scribble : 0x +SCSI opcode : a30c0112 00000000 02000000 00000000 +FCP rsp inf cod: 0x00 +FCP rsp IU : 00000000 00000000 00000a02 00000200 + 00000020 00000000 +FCP rsp IU len : 56 +FCP rsp IU all : 00000000 00000000 00000a02 00000200 + ^^=FCP_RESID_UNDER|FCP_SNS_LEN_VALID + 00000020 00000000 70000500 00000018 + ^^^^^^^^==FCP_SNS_LEN + ^^^^^^^^^^^^^^^^^ + 00000000 240000cb 00011100 00000000 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + 00000000 00000000 + ^^^^^^^^^^^^^^^^^==FCP_SNS_INFO + +Timestamp : ... +Area : SCSI +Subarea : 00 +Level : 1 +Exception : - +CPU ID : .. +Caller : 0x... +Record ID : 1 +Tag : lr_okay +Request ID : 0x +SCSI ID : 0x... +SCSI LUN : 0x... +SCSI result : 0x00000000 +SCSI retries : 0x00 +SCSI allowed : 0x05 +SCSI scribble : 0x +SCSI opcode : +FCP rsp inf cod: 0x00 +FCP rsp IU : 00000000 00000000 00000100 00000000 + 00000000 00000008 +FCP rsp IU len : 32 +FCP rsp IU all : 00000000 00000000 00000100 00000000 + ^^==FCP_RSP_LEN_VALID + 00000000 00000008 00000000 00000000 + ^^^^^^^^==FCP_RSP_LEN + ^^^^^^^^^^^^^^^^^==FCP_RSP_INFO + +Signed-off-by: Steffen Maier +Fixes: 250a1352b95e ("[SCSI] zfcp: Redesign of the debug tracing for SCSI records.") +Reviewed-by: Benjamin Block +Signed-off-by: Benjamin Block +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/s390/scsi/zfcp_dbf.c | 21 +++++++++++++++++---- + 1 file changed, 17 insertions(+), 4 deletions(-) + +--- a/drivers/s390/scsi/zfcp_dbf.c ++++ b/drivers/s390/scsi/zfcp_dbf.c +@@ -572,19 +572,32 @@ void zfcp_dbf_scsi(char *tag, int level, + + if (fsf) { + rec->fsf_req_id = fsf->req_id; ++ rec->pl_len = FCP_RESP_WITH_EXT; + fcp_rsp = (struct fcp_resp_with_ext *) + &(fsf->qtcb->bottom.io.fcp_rsp); ++ /* mandatory parts of FCP_RSP IU in this SCSI record */ + memcpy(&rec->fcp_rsp, fcp_rsp, FCP_RESP_WITH_EXT); + if (fcp_rsp->resp.fr_flags & FCP_RSP_LEN_VAL) { + fcp_rsp_info = (struct fcp_resp_rsp_info *) &fcp_rsp[1]; + rec->fcp_rsp_info = fcp_rsp_info->rsp_code; ++ rec->pl_len += be32_to_cpu(fcp_rsp->ext.fr_rsp_len); + } + if (fcp_rsp->resp.fr_flags & FCP_SNS_LEN_VAL) { +- rec->pl_len = min((u16)SCSI_SENSE_BUFFERSIZE, +- (u16)ZFCP_DBF_PAY_MAX_REC); +- zfcp_dbf_pl_write(dbf, sc->sense_buffer, rec->pl_len, +- "fcp_sns", fsf->req_id); ++ rec->pl_len += be32_to_cpu(fcp_rsp->ext.fr_sns_len); + } ++ /* complete FCP_RSP IU in associated PAYload record ++ * but only if there are optional parts ++ */ ++ if (fcp_rsp->resp.fr_flags != 0) ++ zfcp_dbf_pl_write( ++ dbf, fcp_rsp, ++ /* at least one full PAY record ++ * but not beyond hardware response field ++ */ ++ min_t(u16, max_t(u16, rec->pl_len, ++ ZFCP_DBF_PAY_MAX_REC), ++ FSF_FCP_RSP_SIZE), ++ "fcp_riu", fsf->req_id); + } + + debug_event(dbf->scsi, level, rec, sizeof(*rec)); diff --git a/queue-4.13/scsi-zfcp-fix-queuecommand-for-scsi_eh-commands-when-dix-enabled.patch b/queue-4.13/scsi-zfcp-fix-queuecommand-for-scsi_eh-commands-when-dix-enabled.patch new file mode 100644 index 00000000000..69b19f16699 --- /dev/null +++ b/queue-4.13/scsi-zfcp-fix-queuecommand-for-scsi_eh-commands-when-dix-enabled.patch @@ -0,0 +1,59 @@ +From 71b8e45da51a7b64a23378221c0a5868bd79da4f Mon Sep 17 00:00:00 2001 +From: Steffen Maier +Date: Fri, 28 Jul 2017 12:30:51 +0200 +Subject: scsi: zfcp: fix queuecommand for scsi_eh commands when DIX enabled + +From: Steffen Maier + +commit 71b8e45da51a7b64a23378221c0a5868bd79da4f upstream. + +Since commit db007fc5e20c ("[SCSI] Command protection operation"), +scsi_eh_prep_cmnd() saves scmd->prot_op and temporarily resets it to +SCSI_PROT_NORMAL. +Other FCP LLDDs such as qla2xxx and lpfc shield their queuecommand() +to only access any of scsi_prot_sg...() if +(scsi_get_prot_op(cmd) != SCSI_PROT_NORMAL). + +Do the same thing for zfcp, which introduced DIX support with +commit ef3eb71d8ba4 ("[SCSI] zfcp: Introduce experimental support for +DIF/DIX"). + +Otherwise, TUR SCSI commands as part of scsi_eh likely fail in zfcp, +because the regular SCSI command with DIX protection data, that scsi_eh +re-uses in scsi_send_eh_cmnd(), of course still has +(scsi_prot_sg_count() != 0) and so zfcp sends down bogus requests to the +FCP channel hardware. + +This causes scsi_eh_test_devices() to have (finish_cmds == 0) +[not SCSI device is online or not scsi_eh_tur() failed] +so regular SCSI commands, that caused / were affected by scsi_eh, +are moved to work_q and scsi_eh_test_devices() itself returns false. +In turn, it unnecessarily escalates in our case in scsi_eh_ready_devs() +beyond host reset to finally scsi_eh_offline_sdevs() +which sets affected SCSI devices offline with the following kernel message: + +"kernel: sd H:0:T:L: Device offlined - not ready after error recovery" + +Signed-off-by: Steffen Maier +Fixes: ef3eb71d8ba4 ("[SCSI] zfcp: Introduce experimental support for DIF/DIX") +Reviewed-by: Benjamin Block +Signed-off-by: Benjamin Block +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/s390/scsi/zfcp_fsf.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/s390/scsi/zfcp_fsf.c ++++ b/drivers/s390/scsi/zfcp_fsf.c +@@ -2258,7 +2258,8 @@ int zfcp_fsf_fcp_cmnd(struct scsi_cmnd * + fcp_cmnd = (struct fcp_cmnd *) &req->qtcb->bottom.io.fcp_cmnd; + zfcp_fc_scsi_to_fcp(fcp_cmnd, scsi_cmnd, 0); + +- if (scsi_prot_sg_count(scsi_cmnd)) { ++ if ((scsi_get_prot_op(scsi_cmnd) != SCSI_PROT_NORMAL) && ++ scsi_prot_sg_count(scsi_cmnd)) { + zfcp_qdio_set_data_div(qdio, &req->qdio_req, + scsi_prot_sg_count(scsi_cmnd)); + retval = zfcp_qdio_sbals_from_sg(qdio, &req->qdio_req, diff --git a/queue-4.13/scsi-zfcp-trace-hba-fsf-response-by-default-on-dismiss-or-timedout-late-response.patch b/queue-4.13/scsi-zfcp-trace-hba-fsf-response-by-default-on-dismiss-or-timedout-late-response.patch new file mode 100644 index 00000000000..a2a0e913847 --- /dev/null +++ b/queue-4.13/scsi-zfcp-trace-hba-fsf-response-by-default-on-dismiss-or-timedout-late-response.patch @@ -0,0 +1,217 @@ +From fdb7cee3b9e3c561502e58137a837341f10cbf8b Mon Sep 17 00:00:00 2001 +From: Steffen Maier +Date: Fri, 28 Jul 2017 12:30:57 +0200 +Subject: scsi: zfcp: trace HBA FSF response by default on dismiss or timedout late response + +From: Steffen Maier + +commit fdb7cee3b9e3c561502e58137a837341f10cbf8b upstream. + +At the default trace level, we only trace unsuccessful events including +FSF responses. + +zfcp_dbf_hba_fsf_response() only used protocol status and FSF status to +decide on an unsuccessful response. However, this is only one of multiple +possible sources determining a failed struct zfcp_fsf_req. + +An FSF request can also "fail" if its response runs into an ERP timeout +or if it gets dismissed because a higher level recovery was triggered +[trace tags "erscf_1" or "erscf_2" in zfcp_erp_strategy_check_fsfreq()]. +FSF requests with ERP timeout are: +FSF_QTCB_EXCHANGE_CONFIG_DATA, FSF_QTCB_EXCHANGE_PORT_DATA, +FSF_QTCB_OPEN_PORT_WITH_DID or FSF_QTCB_CLOSE_PORT or +FSF_QTCB_CLOSE_PHYSICAL_PORT for target ports, +FSF_QTCB_OPEN_LUN, FSF_QTCB_CLOSE_LUN. +One example is slow queue processing which can cause follow-on errors, +e.g. FSF_PORT_ALREADY_OPEN after FSF_QTCB_OPEN_PORT_WITH_DID timed out. +In order to see the root cause, we need to see late responses even if the +channel presented them successfully with FSF_PROT_GOOD and FSF_GOOD. +Example trace records formatted with zfcpdbf from the s390-tools package: + +Timestamp : ... +Area : REC +Subarea : 00 +Level : 1 +Exception : - +CPU ID : .. +Caller : ... +Record ID : 1 +Tag : fcegpf1 +LUN : 0xffffffffffffffff +WWPN : 0x +D_ID : 0x00 +Adapter status : 0x5400050b +Port status : 0x41200000 +LUN status : 0x00000000 +Ready count : 0x00000001 +Running count : 0x... +ERP want : 0x02 ZFCP_ERP_ACTION_REOPEN_PORT +ERP need : 0x02 ZFCP_ERP_ACTION_REOPEN_PORT +| +Timestamp : ... 30 seconds later +Area : REC +Subarea : 00 +Level : 1 +Exception : - +CPU ID : .. +Caller : ... +Record ID : 2 +Tag : erscf_2 +LUN : 0xffffffffffffffff +WWPN : 0x +D_ID : 0x00 +Adapter status : 0x5400050b +Port status : 0x41200000 +LUN status : 0x00000000 +Request ID : 0x +ERP status : 0x10000000 ZFCP_STATUS_ERP_TIMEDOUT +ERP step : 0x0800 ZFCP_ERP_STEP_PORT_OPENING +ERP action : 0x02 ZFCP_ERP_ACTION_REOPEN_PORT +ERP count : 0x00 +| +Timestamp : ... later than previous record +Area : HBA +Subarea : 00 +Level : 5 > default level => 3 <= default level +Exception : - +CPU ID : 00 +Caller : ... +Record ID : 1 +Tag : fs_qtcb => fs_rerr +Request ID : 0x +Request status : 0x00001010 ZFCP_STATUS_FSFREQ_DISMISSED + | ZFCP_STATUS_FSFREQ_CLEANUP +FSF cmnd : 0x00000005 +FSF sequence no: 0x... +FSF issued : ... > 30 seconds ago +FSF stat : 0x00000000 FSF_GOOD +FSF stat qual : 00000000 00000000 00000000 00000000 +Prot stat : 0x00000001 FSF_PROT_GOOD +Prot stat qual : 00000000 00000000 00000000 00000000 +Port handle : 0x... +LUN handle : 0x00000000 +QTCB log length: ... +QTCB log info : ... + +In case of problems detecting that new responses are waiting on the input +queue, we sooner or later trigger adapter recovery due to an FSF request +timeout (trace tag "fsrth_1"). +FSF requests with FSF request timeout are: +typically FSF_QTCB_ABORT_FCP_CMND; but theoretically also +FSF_QTCB_EXCHANGE_CONFIG_DATA or FSF_QTCB_EXCHANGE_PORT_DATA via sysfs, +FSF_QTCB_OPEN_PORT_WITH_DID or FSF_QTCB_CLOSE_PORT for WKA ports, +FSF_QTCB_FCP_CMND for task management function (LUN / target reset). +One or more pending requests can meanwhile have FSF_PROT_GOOD and FSF_GOOD +because the channel filled in the response via DMA into the request's QTCB. + +In a theroretical case, inject code can create an erroneous FSF request +on purpose. If data router is enabled, it uses deferred error reporting. +A READ SCSI command can succeed with FSF_PROT_GOOD, FSF_GOOD, and +SAM_STAT_GOOD. But on writing the read data to host memory via DMA, +it can still fail, e.g. if an intentionally wrong scatter list does not +provide enough space. Rather than getting an unsuccessful response, +we get a QDIO activate check which in turn triggers adapter recovery. +One or more pending requests can meanwhile have FSF_PROT_GOOD and FSF_GOOD +because the channel filled in the response via DMA into the request's QTCB. +Example trace records formatted with zfcpdbf from the s390-tools package: + +Timestamp : ... +Area : HBA +Subarea : 00 +Level : 6 > default level => 3 <= default level +Exception : - +CPU ID : .. +Caller : ... +Record ID : 1 +Tag : fs_norm => fs_rerr +Request ID : 0x +Request status : 0x00001010 ZFCP_STATUS_FSFREQ_DISMISSED + | ZFCP_STATUS_FSFREQ_CLEANUP +FSF cmnd : 0x00000001 +FSF sequence no: 0x... +FSF issued : ... +FSF stat : 0x00000000 FSF_GOOD +FSF stat qual : 00000000 00000000 00000000 00000000 +Prot stat : 0x00000001 FSF_PROT_GOOD +Prot stat qual : ........ ........ 00000000 00000000 +Port handle : 0x... +LUN handle : 0x... +| +Timestamp : ... +Area : SCSI +Subarea : 00 +Level : 3 +Exception : - +CPU ID : .. +Caller : ... +Record ID : 1 +Tag : rsl_err +Request ID : 0x +SCSI ID : 0x... +SCSI LUN : 0x... +SCSI result : 0x000e0000 DID_TRANSPORT_DISRUPTED +SCSI retries : 0x00 +SCSI allowed : 0x05 +SCSI scribble : 0x +SCSI opcode : 28... Read(10) +FCP rsp inf cod: 0x00 +FCP rsp IU : 00000000 00000000 00000000 00000000 + ^^ SAM_STAT_GOOD + 00000000 00000000 + +Only with luck in both above cases, we could see a follow-on trace record +of an unsuccesful event following a successful but late FSF response with +FSF_PROT_GOOD and FSF_GOOD. Typically this was the case for I/O requests +resulting in a SCSI trace record "rsl_err" with DID_TRANSPORT_DISRUPTED +[On ZFCP_STATUS_FSFREQ_DISMISSED, zfcp_fsf_protstatus_eval() sets +ZFCP_STATUS_FSFREQ_ERROR seen by the request handler functions as failure]. +However, the reason for this follow-on trace was invisible because the +corresponding HBA trace record was missing at the default trace level +(by default hidden records with tags "fs_norm", "fs_qtcb", or "fs_open"). + +On adapter recovery, after we had shut down the QDIO queues, we perform +unsuccessful pseudo completions with flag ZFCP_STATUS_FSFREQ_DISMISSED +for each pending FSF request in zfcp_fsf_req_dismiss_all(). +In order to find the root cause, we need to see all pseudo responses even +if the channel presented them successfully with FSF_PROT_GOOD and FSF_GOOD. + +Therefore, check zfcp_fsf_req.status for ZFCP_STATUS_FSFREQ_DISMISSED +or ZFCP_STATUS_FSFREQ_ERROR and trace with a new tag "fs_rerr". + +It does not matter that there are numerous places which set +ZFCP_STATUS_FSFREQ_ERROR after the location where we trace an FSF response +early. These cases are based on protocol status != FSF_PROT_GOOD or +== FSF_PROT_FSF_STATUS_PRESENTED and are thus already traced by default +as trace tag "fs_perr" or "fs_ferr" respectively. + +NB: The trace record with tag "fssrh_1" for status read buffers on dismiss +all remains. zfcp_fsf_req_complete() handles this and returns early. +All other FSF request types are handled separately and as described above. + +Signed-off-by: Steffen Maier +Fixes: 8a36e4532ea1 ("[SCSI] zfcp: enhancement of zfcp debug features") +Fixes: 2e261af84cdb ("[SCSI] zfcp: Only collect FSF/HBA debug data for matching trace levels") +Reviewed-by: Benjamin Block +Signed-off-by: Benjamin Block +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/s390/scsi/zfcp_dbf.h | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/s390/scsi/zfcp_dbf.h ++++ b/drivers/s390/scsi/zfcp_dbf.h +@@ -323,7 +323,11 @@ void zfcp_dbf_hba_fsf_response(struct zf + { + struct fsf_qtcb *qtcb = req->qtcb; + +- if ((qtcb->prefix.prot_status != FSF_PROT_GOOD) && ++ if (unlikely(req->status & (ZFCP_STATUS_FSFREQ_DISMISSED | ++ ZFCP_STATUS_FSFREQ_ERROR))) { ++ zfcp_dbf_hba_fsf_resp("fs_rerr", 3, req); ++ ++ } else if ((qtcb->prefix.prot_status != FSF_PROT_GOOD) && + (qtcb->prefix.prot_status != FSF_PROT_FSF_STATUS_PRESENTED)) { + zfcp_dbf_hba_fsf_resp("fs_perr", 1, req); + diff --git a/queue-4.13/scsi-zfcp-trace-high-part-of-new-64-bit-scsi-lun.patch b/queue-4.13/scsi-zfcp-trace-high-part-of-new-64-bit-scsi-lun.patch new file mode 100644 index 00000000000..7bcfb072602 --- /dev/null +++ b/queue-4.13/scsi-zfcp-trace-high-part-of-new-64-bit-scsi-lun.patch @@ -0,0 +1,150 @@ +From 5d4a3d0a2ff23799b956e5962b886287614e7fad Mon Sep 17 00:00:00 2001 +From: Steffen Maier +Date: Fri, 28 Jul 2017 12:30:58 +0200 +Subject: scsi: zfcp: trace high part of "new" 64 bit SCSI LUN + +From: Steffen Maier + +commit 5d4a3d0a2ff23799b956e5962b886287614e7fad upstream. + +Complements debugging aspects of the otherwise functionally complete +v3.17 commit 9cb78c16f5da ("scsi: use 64-bit LUNs"). + +While I don't have access to a target exporting 3 or 4 level LUNs, +I did test it by explicitly attaching a non-existent fake 4 level LUN +by means of zfcp sysfs attribute "unit_add". +In order to see corresponding trace records of otherwise successful +events, we had to increase the trace level of area SCSI and HBA to 6. + +$ echo 6 > /sys/kernel/debug/s390dbf/zfcp_0.0.1880_scsi/level +$ echo 6 > /sys/kernel/debug/s390dbf/zfcp_0.0.1880_hba/level + +$ echo 0x4011402240334044 > \ + /sys/bus/ccw/drivers/zfcp/0.0.1880/0x50050763031bd327/unit_add + +Example output formatted by an updated zfcpdbf from the s390-tools +package interspersed with kernel messages at scsi_logging_level=4605: + +Timestamp : ... +Area : REC +Subarea : 00 +Level : 1 +Exception : - +CPU ID : .. +Caller : 0x... +Record ID : 1 +Tag : scsla_1 +LUN : 0x4011402240334044 +WWPN : 0x50050763031bd327 +D_ID : 0x00...... +Adapter status : 0x5400050b +Port status : 0x54000001 +LUN status : 0x41000000 +Ready count : 0x00000001 +Running count : 0x00000000 +ERP want : 0x01 +ERP need : 0x01 + +scsi 2:0:0:4630896905707208721: scsi scan: INQUIRY pass 1 length 36 +scsi 2:0:0:4630896905707208721: scsi scan: INQUIRY successful with code 0x0 + +Timestamp : ... +Area : HBA +Subarea : 00 +Level : 6 +Exception : - +CPU ID : .. +Caller : 0x... +Record ID : 1 +Tag : fs_norm +Request ID : 0x +Request status : 0x00000010 +FSF cmnd : 0x00000001 +FSF sequence no: 0x... +FSF issued : ... +FSF stat : 0x00000000 +FSF stat qual : 00000000 00000000 00000000 00000000 +Prot stat : 0x00000001 +Prot stat qual : ........ ........ 00000000 00000000 +Port handle : 0x... +LUN handle : 0x... +| +Timestamp : ... +Area : SCSI +Subarea : 00 +Level : 6 +Exception : - +CPU ID : .. +Caller : 0x... +Record ID : 1 +Tag : rsl_nor +Request ID : 0x +SCSI ID : 0x00000000 +SCSI LUN : 0x40224011 +SCSI LUN high : 0x40444033 <======================= +SCSI result : 0x00000000 +SCSI retries : 0x00 +SCSI allowed : 0x03 +SCSI scribble : 0x +SCSI opcode : 12000000 a4000000 00000000 00000000 +FCP rsp inf cod: 0x00 +FCP rsp IU : 00000000 00000000 00000000 00000000 + 00000000 00000000 + +scsi 2:0:0:4630896905707208721: scsi scan: INQUIRY pass 2 length 164 +scsi 2:0:0:4630896905707208721: scsi scan: INQUIRY successful with code 0x0 +scsi 2:0:0:4630896905707208721: scsi scan: peripheral device type of 31, \ +no device added + +Signed-off-by: Steffen Maier +Fixes: 9cb78c16f5da ("scsi: use 64-bit LUNs") +Reviewed-by: Benjamin Block +Reviewed-by: Jens Remus +Signed-off-by: Benjamin Block +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/s390/scsi/zfcp_dbf.c | 2 +- + drivers/s390/scsi/zfcp_dbf.h | 4 +++- + 2 files changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/s390/scsi/zfcp_dbf.c ++++ b/drivers/s390/scsi/zfcp_dbf.c +@@ -563,8 +563,8 @@ void zfcp_dbf_scsi(char *tag, int level, + rec->scsi_retries = sc->retries; + rec->scsi_allowed = sc->allowed; + rec->scsi_id = sc->device->id; +- /* struct zfcp_dbf_scsi needs to be updated to handle 64bit LUNs */ + rec->scsi_lun = (u32)sc->device->lun; ++ rec->scsi_lun_64_hi = (u32)(sc->device->lun >> 32); + rec->host_scribble = (unsigned long)sc->host_scribble; + + memcpy(rec->scsi_opcode, sc->cmnd, +--- a/drivers/s390/scsi/zfcp_dbf.h ++++ b/drivers/s390/scsi/zfcp_dbf.h +@@ -204,7 +204,7 @@ enum zfcp_dbf_scsi_id { + * @id: unique number of recovery record type + * @tag: identifier string specifying the location of initiation + * @scsi_id: scsi device id +- * @scsi_lun: scsi device logical unit number ++ * @scsi_lun: scsi device logical unit number, low part of 64 bit, old 32 bit + * @scsi_result: scsi result + * @scsi_retries: current retry number of scsi request + * @scsi_allowed: allowed retries +@@ -214,6 +214,7 @@ enum zfcp_dbf_scsi_id { + * @host_scribble: LLD specific data attached to SCSI request + * @pl_len: length of paload stored as zfcp_dbf_pay + * @fsf_rsp: response for fsf request ++ * @scsi_lun_64_hi: scsi device logical unit number, high part of 64 bit + */ + struct zfcp_dbf_scsi { + u8 id; +@@ -230,6 +231,7 @@ struct zfcp_dbf_scsi { + u64 host_scribble; + u16 pl_len; + struct fcp_resp_with_ext fcp_rsp; ++ u32 scsi_lun_64_hi; + } __packed; + + /** diff --git a/queue-4.13/series b/queue-4.13/series index 4026f53ccd4..ba68d54e40d 100644 --- a/queue-4.13/series +++ b/queue-4.13/series @@ -45,4 +45,21 @@ block-relax-a-check-in-blk_start_queue.patch block-directly-insert-blk-mq-request-from-blk_insert_cloned_request.patch md-bitmap-copy-correct-data-for-bitmap-super.patch md-bitmap-disable-bitmap_resize-for-file-backed-bitmaps.patch -iwlwifi-add-workaround-to-disable-wide-channels-in-5ghz.patch +skd-avoid-that-module-unloading-triggers-a-use-after-free.patch +skd-submit-requests-to-firmware-before-triggering-the-doorbell.patch +scsi-zfcp-fix-queuecommand-for-scsi_eh-commands-when-dix-enabled.patch +scsi-zfcp-add-handling-for-fcp_resid_over-to-the-fcp-ingress-path.patch +scsi-zfcp-fix-capping-of-unsuccessful-gpn_ft-san-response-trace-records.patch +scsi-zfcp-fix-passing-fsf_req-to-scsi-trace-on-tmf-to-correlate-with-hba.patch +scsi-zfcp-fix-missing-trace-records-for-early-returns-in-tmf-eh-handlers.patch +scsi-zfcp-fix-payload-with-full-fcp_rsp-iu-in-scsi-trace-records.patch +scsi-zfcp-trace-hba-fsf-response-by-default-on-dismiss-or-timedout-late-response.patch +scsi-zfcp-trace-high-part-of-new-64-bit-scsi-lun.patch +scsi-qedi-off-by-one-in-qedi_get_cmd_from_tid.patch +scsi-aacraid-fix-command-send-race-condition.patch +scsi-megaraid_sas-mismatch-of-allocated-mfi-frame-size-and-length-exposed-in-mfi-mpt-pass-through-command.patch +scsi-megaraid_sas-set-minimum-value-of-resetwaittime-to-be-1-secs.patch +scsi-megaraid_sas-check-valid-aen-class-range-to-avoid-kernel-panic.patch +scsi-megaraid_sas-return-pended-ioctls-with-cmd_status-mfi_stat_wrong_state-in-case-adapter-is-dead.patch +scsi-storvsc-fix-memory-leak-on-ring-buffer-busy.patch +scsi-sg-factor-out-sg_fill_request_table.patch diff --git a/queue-4.13/skd-avoid-that-module-unloading-triggers-a-use-after-free.patch b/queue-4.13/skd-avoid-that-module-unloading-triggers-a-use-after-free.patch new file mode 100644 index 00000000000..1b248857634 --- /dev/null +++ b/queue-4.13/skd-avoid-that-module-unloading-triggers-a-use-after-free.patch @@ -0,0 +1,78 @@ +From 7277cc67b3916eed47558c64f9c9c0de00a35cda Mon Sep 17 00:00:00 2001 +From: Bart Van Assche +Date: Thu, 17 Aug 2017 13:12:45 -0700 +Subject: skd: Avoid that module unloading triggers a use-after-free + +From: Bart Van Assche + +commit 7277cc67b3916eed47558c64f9c9c0de00a35cda upstream. + +Since put_disk() triggers a disk_release() call and since that +last function calls blk_put_queue() if disk->queue != NULL, clear +the disk->queue pointer before calling put_disk(). This avoids +that unloading the skd kernel module triggers the following +use-after-free: + +WARNING: CPU: 8 PID: 297 at lib/refcount.c:128 refcount_sub_and_test+0x70/0x80 +refcount_t: underflow; use-after-free. +CPU: 8 PID: 297 Comm: kworker/8:1 Not tainted 4.11.10-300.fc26.x86_64 #1 +Workqueue: events work_for_cpu_fn +Call Trace: + dump_stack+0x63/0x84 + __warn+0xcb/0xf0 + warn_slowpath_fmt+0x5a/0x80 + refcount_sub_and_test+0x70/0x80 + refcount_dec_and_test+0x11/0x20 + kobject_put+0x1f/0x50 + blk_put_queue+0x15/0x20 + disk_release+0xae/0xf0 + device_release+0x32/0x90 + kobject_release+0x67/0x170 + kobject_put+0x2b/0x50 + put_disk+0x17/0x20 + skd_destruct+0x5c/0x890 [skd] + skd_pci_probe+0x124d/0x13a0 [skd] + local_pci_probe+0x42/0xa0 + work_for_cpu_fn+0x14/0x20 + process_one_work+0x19e/0x470 + worker_thread+0x1dc/0x4a0 + kthread+0x125/0x140 + ret_from_fork+0x25/0x30 + +Signed-off-by: Bart Van Assche +Cc: Christoph Hellwig +Cc: Hannes Reinecke +Cc: Johannes Thumshirn +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/skd_main.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +--- a/drivers/block/skd_main.c ++++ b/drivers/block/skd_main.c +@@ -4539,15 +4539,16 @@ static void skd_free_disk(struct skd_dev + { + struct gendisk *disk = skdev->disk; + +- if (disk != NULL) { +- struct request_queue *q = disk->queue; ++ if (disk && (disk->flags & GENHD_FL_UP)) ++ del_gendisk(disk); + +- if (disk->flags & GENHD_FL_UP) +- del_gendisk(disk); +- if (q) +- blk_cleanup_queue(q); +- put_disk(disk); ++ if (skdev->queue) { ++ blk_cleanup_queue(skdev->queue); ++ skdev->queue = NULL; ++ disk->queue = NULL; + } ++ ++ put_disk(disk); + skdev->disk = NULL; + } + diff --git a/queue-4.13/skd-submit-requests-to-firmware-before-triggering-the-doorbell.patch b/queue-4.13/skd-submit-requests-to-firmware-before-triggering-the-doorbell.patch new file mode 100644 index 00000000000..c7cb4df7e18 --- /dev/null +++ b/queue-4.13/skd-submit-requests-to-firmware-before-triggering-the-doorbell.patch @@ -0,0 +1,49 @@ +From 5fbd545cd3fd311ea1d6e8be4cedddd0ee5684c7 Mon Sep 17 00:00:00 2001 +From: Bart Van Assche +Date: Thu, 17 Aug 2017 13:12:46 -0700 +Subject: skd: Submit requests to firmware before triggering the doorbell + +From: Bart Van Assche + +commit 5fbd545cd3fd311ea1d6e8be4cedddd0ee5684c7 upstream. + +Ensure that the members of struct skd_msg_buf have been transferred +to the PCIe adapter before the doorbell is triggered. This patch +avoids that I/O fails sporadically and that the following error +message is reported: + +(skd0:STM000196603:[0000:00:09.0]): Completion mismatch comp_id=0x0000 skreq=0x0400 new=0x0000 + +Signed-off-by: Bart Van Assche +Cc: Christoph Hellwig +Cc: Hannes Reinecke +Cc: Johannes Thumshirn +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/block/skd_main.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/block/skd_main.c ++++ b/drivers/block/skd_main.c +@@ -2160,6 +2160,9 @@ static void skd_send_fitmsg(struct skd_d + */ + qcmd |= FIT_QCMD_MSGSIZE_64; + ++ /* Make sure skd_msg_buf is written before the doorbell is triggered. */ ++ smp_wmb(); ++ + SKD_WRITEQ(skdev, qcmd, FIT_Q_COMMAND); + } + +@@ -2202,6 +2205,9 @@ static void skd_send_special_fitmsg(stru + qcmd = skspcl->mb_dma_address; + qcmd |= FIT_QCMD_QID_NORMAL + FIT_QCMD_MSGSIZE_128; + ++ /* Make sure skd_msg_buf is written before the doorbell is triggered. */ ++ smp_wmb(); ++ + SKD_WRITEQ(skdev, qcmd, FIT_Q_COMMAND); + } +