From 1bbc64e3315c31db578d7af1a729e4932ef76320 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 14 Aug 2022 12:17:22 +0200 Subject: [PATCH] 5.15-stable patches added patches: scsi-qla2xxx-fix-crash-due-to-stale-srb-access-around-i-o-timeouts.patch scsi-qla2xxx-fix-discovery-issues-in-fc-al-topology.patch scsi-qla2xxx-fix-erroneous-mailbox-timeout-after-pci-error-injection.patch scsi-qla2xxx-fix-excessive-i-o-error-messages-by-default.patch scsi-qla2xxx-fix-imbalance-vha-vref_count.patch scsi-qla2xxx-fix-losing-fcp-2-targets-during-port-perturbation-tests.patch scsi-qla2xxx-fix-losing-fcp-2-targets-on-long-port-disable-with-i-os.patch scsi-qla2xxx-fix-losing-target-when-it-reappears-during-delete.patch scsi-qla2xxx-turn-off-multi-queue-for-8g-adapters.patch scsi-qla2xxx-wind-down-adapter-after-pcie-error.patch scsi-ufs-core-correct-ufshcd_shutdown-flow.patch scsi-zfcp-fix-missing-auto-port-scan-and-thus-missing-target-ports.patch --- ...stale-srb-access-around-i-o-timeouts.patch | 120 +++++++++ ...x-discovery-issues-in-fc-al-topology.patch | 107 ++++++++ ...ox-timeout-after-pci-error-injection.patch | 61 +++++ ...essive-i-o-error-messages-by-default.patch | 43 ++++ ...qla2xxx-fix-imbalance-vha-vref_count.patch | 56 +++++ ...rgets-during-port-perturbation-tests.patch | 35 +++ ...rgets-on-long-port-disable-with-i-os.patch | 66 +++++ ...rget-when-it-reappears-during-delete.patch | 79 ++++++ ...turn-off-multi-queue-for-8g-adapters.patch | 61 +++++ ...x-wind-down-adapter-after-pcie-error.patch | 199 +++++++++++++++ ...fs-core-correct-ufshcd_shutdown-flow.patch | 47 ++++ ...t-scan-and-thus-missing-target-ports.patch | 232 ++++++++++++++++++ queue-5.15/series | 12 + 13 files changed, 1118 insertions(+) create mode 100644 queue-5.15/scsi-qla2xxx-fix-crash-due-to-stale-srb-access-around-i-o-timeouts.patch create mode 100644 queue-5.15/scsi-qla2xxx-fix-discovery-issues-in-fc-al-topology.patch create mode 100644 queue-5.15/scsi-qla2xxx-fix-erroneous-mailbox-timeout-after-pci-error-injection.patch create mode 100644 queue-5.15/scsi-qla2xxx-fix-excessive-i-o-error-messages-by-default.patch create mode 100644 queue-5.15/scsi-qla2xxx-fix-imbalance-vha-vref_count.patch create mode 100644 queue-5.15/scsi-qla2xxx-fix-losing-fcp-2-targets-during-port-perturbation-tests.patch create mode 100644 queue-5.15/scsi-qla2xxx-fix-losing-fcp-2-targets-on-long-port-disable-with-i-os.patch create mode 100644 queue-5.15/scsi-qla2xxx-fix-losing-target-when-it-reappears-during-delete.patch create mode 100644 queue-5.15/scsi-qla2xxx-turn-off-multi-queue-for-8g-adapters.patch create mode 100644 queue-5.15/scsi-qla2xxx-wind-down-adapter-after-pcie-error.patch create mode 100644 queue-5.15/scsi-ufs-core-correct-ufshcd_shutdown-flow.patch create mode 100644 queue-5.15/scsi-zfcp-fix-missing-auto-port-scan-and-thus-missing-target-ports.patch diff --git a/queue-5.15/scsi-qla2xxx-fix-crash-due-to-stale-srb-access-around-i-o-timeouts.patch b/queue-5.15/scsi-qla2xxx-fix-crash-due-to-stale-srb-access-around-i-o-timeouts.patch new file mode 100644 index 00000000000..6782be7e599 --- /dev/null +++ b/queue-5.15/scsi-qla2xxx-fix-crash-due-to-stale-srb-access-around-i-o-timeouts.patch @@ -0,0 +1,120 @@ +From c39587bc0abaf16593f7abcdf8aeec3c038c7d52 Mon Sep 17 00:00:00 2001 +From: Arun Easi +Date: Wed, 15 Jun 2022 22:35:02 -0700 +Subject: scsi: qla2xxx: Fix crash due to stale SRB access around I/O timeouts + +From: Arun Easi + +commit c39587bc0abaf16593f7abcdf8aeec3c038c7d52 upstream. + +Ensure SRB is returned during I/O timeout error escalation. If that is not +possible fail the escalation path. + +Following crash stack was seen: + +BUG: unable to handle kernel paging request at 0000002f56aa90f8 +IP: qla_chk_edif_rx_sa_delete_pending+0x14/0x30 [qla2xxx] +Call Trace: + ? qla2x00_status_entry+0x19f/0x1c50 [qla2xxx] + ? qla2x00_start_sp+0x116/0x1170 [qla2xxx] + ? dma_pool_alloc+0x1d6/0x210 + ? mempool_alloc+0x54/0x130 + ? qla24xx_process_response_queue+0x548/0x12b0 [qla2xxx] + ? qla_do_work+0x2d/0x40 [qla2xxx] + ? process_one_work+0x14c/0x390 + +Link: https://lore.kernel.org/r/20220616053508.27186-6-njavali@marvell.com +Fixes: d74595278f4a ("scsi: qla2xxx: Add multiple queue pair functionality.") +Cc: stable@vger.kernel.org +Signed-off-by: Arun Easi +Signed-off-by: Nilesh Javali +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_os.c | 43 ++++++++++++++++++++++++++++++------------ + 1 file changed, 31 insertions(+), 12 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_os.c ++++ b/drivers/scsi/qla2xxx/qla_os.c +@@ -1333,21 +1333,20 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) + /* + * Returns: QLA_SUCCESS or QLA_FUNCTION_FAILED. + */ +-int +-qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t, +- uint64_t l, enum nexus_wait_type type) ++static int ++__qla2x00_eh_wait_for_pending_commands(struct qla_qpair *qpair, unsigned int t, ++ uint64_t l, enum nexus_wait_type type) + { + int cnt, match, status; + unsigned long flags; +- struct qla_hw_data *ha = vha->hw; +- struct req_que *req; ++ scsi_qla_host_t *vha = qpair->vha; ++ struct req_que *req = qpair->req; + srb_t *sp; + struct scsi_cmnd *cmd; + + status = QLA_SUCCESS; + +- spin_lock_irqsave(&ha->hardware_lock, flags); +- req = vha->req; ++ spin_lock_irqsave(qpair->qp_lock_ptr, flags); + for (cnt = 1; status == QLA_SUCCESS && + cnt < req->num_outstanding_cmds; cnt++) { + sp = req->outstanding_cmds[cnt]; +@@ -1374,12 +1373,32 @@ qla2x00_eh_wait_for_pending_commands(scs + if (!match) + continue; + +- spin_unlock_irqrestore(&ha->hardware_lock, flags); ++ spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); + status = qla2x00_eh_wait_on_command(cmd); +- spin_lock_irqsave(&ha->hardware_lock, flags); ++ spin_lock_irqsave(qpair->qp_lock_ptr, flags); + } +- spin_unlock_irqrestore(&ha->hardware_lock, flags); ++ spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); ++ ++ return status; ++} ++ ++int ++qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t, ++ uint64_t l, enum nexus_wait_type type) ++{ ++ struct qla_qpair *qpair; ++ struct qla_hw_data *ha = vha->hw; ++ int i, status = QLA_SUCCESS; + ++ status = __qla2x00_eh_wait_for_pending_commands(ha->base_qpair, t, l, ++ type); ++ for (i = 0; status == QLA_SUCCESS && i < ha->max_qpairs; i++) { ++ qpair = ha->queue_pair_map[i]; ++ if (!qpair) ++ continue; ++ status = __qla2x00_eh_wait_for_pending_commands(qpair, t, l, ++ type); ++ } + return status; + } + +@@ -1416,7 +1435,7 @@ qla2xxx_eh_device_reset(struct scsi_cmnd + return err; + + if (fcport->deleted) +- return SUCCESS; ++ return FAILED; + + ql_log(ql_log_info, vha, 0x8009, + "DEVICE RESET ISSUED nexus=%ld:%d:%llu cmd=%p.\n", vha->host_no, +@@ -1484,7 +1503,7 @@ qla2xxx_eh_target_reset(struct scsi_cmnd + return err; + + if (fcport->deleted) +- return SUCCESS; ++ return FAILED; + + ql_log(ql_log_info, vha, 0x8009, + "TARGET RESET ISSUED nexus=%ld:%d cmd=%p.\n", vha->host_no, diff --git a/queue-5.15/scsi-qla2xxx-fix-discovery-issues-in-fc-al-topology.patch b/queue-5.15/scsi-qla2xxx-fix-discovery-issues-in-fc-al-topology.patch new file mode 100644 index 00000000000..368c95bf9af --- /dev/null +++ b/queue-5.15/scsi-qla2xxx-fix-discovery-issues-in-fc-al-topology.patch @@ -0,0 +1,107 @@ +From 47ccb113cead905bdc236571bf8ac6fed90321b3 Mon Sep 17 00:00:00 2001 +From: Arun Easi +Date: Tue, 12 Jul 2022 22:20:42 -0700 +Subject: scsi: qla2xxx: Fix discovery issues in FC-AL topology + +From: Arun Easi + +commit 47ccb113cead905bdc236571bf8ac6fed90321b3 upstream. + +A direct attach tape device, when gets swapped with another, was not +discovered. Fix this by looking at loop map and reinitialize link if there +are devices present. + +Link: https://lore.kernel.org/linux-scsi/baef87c3-5dad-3b47-44c1-6914bfc90108@cybernetics.com/ +Link: https://lore.kernel.org/r/20220713052045.10683-8-njavali@marvell.com +Cc: stable@vger.kernel.org +Reported-by: Tony Battersby +Tested-by: Tony Battersby +Reviewed-by: Himanshu Madhani +Signed-off-by: Arun Easi +Signed-off-by: Nilesh Javali +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_gbl.h | 3 ++- + drivers/scsi/qla2xxx/qla_init.c | 29 +++++++++++++++++++++++++++++ + drivers/scsi/qla2xxx/qla_mbx.c | 5 ++++- + 3 files changed, 35 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_gbl.h ++++ b/drivers/scsi/qla2xxx/qla_gbl.h +@@ -433,7 +433,8 @@ extern int + qla2x00_get_resource_cnts(scsi_qla_host_t *); + + extern int +-qla2x00_get_fcal_position_map(scsi_qla_host_t *ha, char *pos_map); ++qla2x00_get_fcal_position_map(scsi_qla_host_t *ha, char *pos_map, ++ u8 *num_entries); + + extern int + qla2x00_get_link_status(scsi_qla_host_t *, uint16_t, struct link_statistics *, +--- a/drivers/scsi/qla2xxx/qla_init.c ++++ b/drivers/scsi/qla2xxx/qla_init.c +@@ -5510,6 +5510,22 @@ static int qla2x00_configure_n2n_loop(sc + return QLA_FUNCTION_FAILED; + } + ++static void ++qla_reinitialize_link(scsi_qla_host_t *vha) ++{ ++ int rval; ++ ++ atomic_set(&vha->loop_state, LOOP_DOWN); ++ atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME); ++ rval = qla2x00_full_login_lip(vha); ++ if (rval == QLA_SUCCESS) { ++ ql_dbg(ql_dbg_disc, vha, 0xd050, "Link reinitialized\n"); ++ } else { ++ ql_dbg(ql_dbg_disc, vha, 0xd051, ++ "Link reinitialization failed (%d)\n", rval); ++ } ++} ++ + /* + * qla2x00_configure_local_loop + * Updates Fibre Channel Device Database with local loop devices. +@@ -5561,6 +5577,19 @@ qla2x00_configure_local_loop(scsi_qla_ho + spin_unlock_irqrestore(&vha->work_lock, flags); + + if (vha->scan.scan_retry < MAX_SCAN_RETRIES) { ++ u8 loop_map_entries = 0; ++ int rc; ++ ++ rc = qla2x00_get_fcal_position_map(vha, NULL, ++ &loop_map_entries); ++ if (rc == QLA_SUCCESS && loop_map_entries > 1) { ++ /* ++ * There are devices that are still not logged ++ * in. Reinitialize to give them a chance. ++ */ ++ qla_reinitialize_link(vha); ++ return QLA_FUNCTION_FAILED; ++ } + set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); + set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); + } +--- a/drivers/scsi/qla2xxx/qla_mbx.c ++++ b/drivers/scsi/qla2xxx/qla_mbx.c +@@ -3062,7 +3062,8 @@ qla2x00_get_resource_cnts(scsi_qla_host_ + * Kernel context. + */ + int +-qla2x00_get_fcal_position_map(scsi_qla_host_t *vha, char *pos_map) ++qla2x00_get_fcal_position_map(scsi_qla_host_t *vha, char *pos_map, ++ u8 *num_entries) + { + int rval; + mbx_cmd_t mc; +@@ -3102,6 +3103,8 @@ qla2x00_get_fcal_position_map(scsi_qla_h + + if (pos_map) + memcpy(pos_map, pmap, FCAL_MAP_SIZE); ++ if (num_entries) ++ *num_entries = pmap[0]; + } + dma_pool_free(ha->s_dma_pool, pmap, pmap_dma); + diff --git a/queue-5.15/scsi-qla2xxx-fix-erroneous-mailbox-timeout-after-pci-error-injection.patch b/queue-5.15/scsi-qla2xxx-fix-erroneous-mailbox-timeout-after-pci-error-injection.patch new file mode 100644 index 00000000000..efaab290787 --- /dev/null +++ b/queue-5.15/scsi-qla2xxx-fix-erroneous-mailbox-timeout-after-pci-error-injection.patch @@ -0,0 +1,61 @@ +From f260694e6463b63ae550aad25ddefe94cb1904da Mon Sep 17 00:00:00 2001 +From: Quinn Tran +Date: Wed, 15 Jun 2022 22:35:07 -0700 +Subject: scsi: qla2xxx: Fix erroneous mailbox timeout after PCI error injection + +From: Quinn Tran + +commit f260694e6463b63ae550aad25ddefe94cb1904da upstream. + +Clear wait for mailbox interrupt flag to prevent stale mailbox: + +Feb 22 05:22:56 ltcden4-lp7 kernel: qla2xxx [0135:90:00.1]-500a:4: LOOP UP detected (16 Gbps). +Feb 22 05:22:59 ltcden4-lp7 kernel: qla2xxx [0135:90:00.1]-d04c:4: MBX Command timeout for cmd 69, ... + +To fix the issue, driver needs to clear the MBX_INTR_WAIT flag on purging +the mailbox. When the stale mailbox completion does arrive, it will be +dropped. + +Link: https://lore.kernel.org/r/20220616053508.27186-11-njavali@marvell.com +Fixes: b6faaaf796d7 ("scsi: qla2xxx: Serialize mailbox request") +Cc: Naresh Bannoth +Cc: Kyle Mahlkuch +Cc: stable@vger.kernel.org +Reported-by: Naresh Bannoth +Tested-by: Naresh Bannoth +Signed-off-by: Quinn Tran +Signed-off-by: Nilesh Javali +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_mbx.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_mbx.c ++++ b/drivers/scsi/qla2xxx/qla_mbx.c +@@ -276,6 +276,12 @@ qla2x00_mailbox_command(scsi_qla_host_t + atomic_inc(&ha->num_pend_mbx_stage3); + if (!wait_for_completion_timeout(&ha->mbx_intr_comp, + mcp->tov * HZ)) { ++ ql_dbg(ql_dbg_mbx, vha, 0x117a, ++ "cmd=%x Timeout.\n", command); ++ spin_lock_irqsave(&ha->hardware_lock, flags); ++ clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags); ++ spin_unlock_irqrestore(&ha->hardware_lock, flags); ++ + if (chip_reset != ha->chip_reset) { + eeh_delay = ha->flags.eeh_busy ? 1 : 0; + +@@ -288,12 +294,6 @@ qla2x00_mailbox_command(scsi_qla_host_t + rval = QLA_ABORTED; + goto premature_exit; + } +- ql_dbg(ql_dbg_mbx, vha, 0x117a, +- "cmd=%x Timeout.\n", command); +- spin_lock_irqsave(&ha->hardware_lock, flags); +- clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags); +- spin_unlock_irqrestore(&ha->hardware_lock, flags); +- + } else if (ha->flags.purge_mbox || + chip_reset != ha->chip_reset) { + eeh_delay = ha->flags.eeh_busy ? 1 : 0; diff --git a/queue-5.15/scsi-qla2xxx-fix-excessive-i-o-error-messages-by-default.patch b/queue-5.15/scsi-qla2xxx-fix-excessive-i-o-error-messages-by-default.patch new file mode 100644 index 00000000000..16be98d7327 --- /dev/null +++ b/queue-5.15/scsi-qla2xxx-fix-excessive-i-o-error-messages-by-default.patch @@ -0,0 +1,43 @@ +From bff4873c709085e09d0ffae0c25b8e65256e3205 Mon Sep 17 00:00:00 2001 +From: Arun Easi +Date: Wed, 15 Jun 2022 22:34:58 -0700 +Subject: scsi: qla2xxx: Fix excessive I/O error messages by default + +From: Arun Easi + +commit bff4873c709085e09d0ffae0c25b8e65256e3205 upstream. + +Disable printing I/O error messages by default. The messages will be +printed only when logging was enabled. + +Link: https://lore.kernel.org/r/20220616053508.27186-2-njavali@marvell.com +Fixes: 8e2d81c6b5be ("scsi: qla2xxx: Fix excessive messages during device logout") +Cc: stable@vger.kernel.org +Signed-off-by: Arun Easi +Signed-off-by: Nilesh Javali +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_isr.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_isr.c ++++ b/drivers/scsi/qla2xxx/qla_isr.c +@@ -2633,7 +2633,7 @@ static void qla24xx_nvme_iocb_entry(scsi + } + + if (unlikely(logit)) +- ql_log(ql_dbg_io, fcport->vha, 0x5060, ++ ql_dbg(ql_dbg_io, fcport->vha, 0x5060, + "NVME-%s ERR Handling - hdl=%x status(%x) tr_len:%x resid=%x ox_id=%x\n", + sp->name, sp->handle, comp_status, + fd->transferred_length, le32_to_cpu(sts->residual_len), +@@ -3491,7 +3491,7 @@ check_scsi_status: + + out: + if (logit) +- ql_log(ql_dbg_io, fcport->vha, 0x3022, ++ ql_dbg(ql_dbg_io, fcport->vha, 0x3022, + "FCP command status: 0x%x-0x%x (0x%x) nexus=%ld:%d:%llu portid=%02x%02x%02x oxid=0x%x cdb=%10phN len=0x%x rsp_info=0x%x resid=0x%x fw_resid=0x%x sp=%p cp=%p.\n", + comp_status, scsi_status, res, vha->host_no, + cp->device->id, cp->device->lun, fcport->d_id.b.domain, diff --git a/queue-5.15/scsi-qla2xxx-fix-imbalance-vha-vref_count.patch b/queue-5.15/scsi-qla2xxx-fix-imbalance-vha-vref_count.patch new file mode 100644 index 00000000000..1e9c73e8cab --- /dev/null +++ b/queue-5.15/scsi-qla2xxx-fix-imbalance-vha-vref_count.patch @@ -0,0 +1,56 @@ +From 63fa7f2644b4b48e1913af33092c044bf48e9321 Mon Sep 17 00:00:00 2001 +From: Quinn Tran +Date: Tue, 12 Jul 2022 22:20:41 -0700 +Subject: scsi: qla2xxx: Fix imbalance vha->vref_count + +From: Quinn Tran + +commit 63fa7f2644b4b48e1913af33092c044bf48e9321 upstream. + +vref_count took an extra decrement in the task management path. Add an +extra ref count to compensate the imbalance. + +Link: https://lore.kernel.org/r/20220713052045.10683-7-njavali@marvell.com +Cc: stable@vger.kernel.org +Reviewed-by: Himanshu Madhani +Signed-off-by: Quinn Tran +Signed-off-by: Nilesh Javali +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_init.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/scsi/qla2xxx/qla_init.c ++++ b/drivers/scsi/qla2xxx/qla_init.c +@@ -161,6 +161,7 @@ int qla24xx_async_abort_cmd(srb_t *cmd_s + struct srb_iocb *abt_iocb; + srb_t *sp; + int rval = QLA_FUNCTION_FAILED; ++ uint8_t bail; + + /* ref: INIT for ABTS command */ + sp = qla2xxx_get_qpair_sp(cmd_sp->vha, cmd_sp->qpair, cmd_sp->fcport, +@@ -168,6 +169,7 @@ int qla24xx_async_abort_cmd(srb_t *cmd_s + if (!sp) + return QLA_MEMORY_ALLOC_FAILED; + ++ QLA_VHA_MARK_BUSY(vha, bail); + abt_iocb = &sp->u.iocb_cmd; + sp->type = SRB_ABT_CMD; + sp->name = "abort"; +@@ -2009,12 +2011,14 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, + struct srb_iocb *tm_iocb; + srb_t *sp; + int rval = QLA_FUNCTION_FAILED; ++ uint8_t bail; + + /* ref: INIT */ + sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); + if (!sp) + goto done; + ++ QLA_VHA_MARK_BUSY(vha, bail); + sp->type = SRB_TM_CMD; + sp->name = "tmf"; + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha), diff --git a/queue-5.15/scsi-qla2xxx-fix-losing-fcp-2-targets-during-port-perturbation-tests.patch b/queue-5.15/scsi-qla2xxx-fix-losing-fcp-2-targets-during-port-perturbation-tests.patch new file mode 100644 index 00000000000..f9ec3d7fc22 --- /dev/null +++ b/queue-5.15/scsi-qla2xxx-fix-losing-fcp-2-targets-during-port-perturbation-tests.patch @@ -0,0 +1,35 @@ +From 58d1c124cd79ea686b512043c5bd515590b2ed95 Mon Sep 17 00:00:00 2001 +From: Arun Easi +Date: Wed, 15 Jun 2022 22:35:03 -0700 +Subject: scsi: qla2xxx: Fix losing FCP-2 targets during port perturbation tests + +From: Arun Easi + +commit 58d1c124cd79ea686b512043c5bd515590b2ed95 upstream. + +When a mix of FCP-2 (tape) and non-FCP-2 targets are present, FCP-2 target +state was incorrectly transitioned when both of the targets were gone. Fix +this by ignoring state transition for FCP-2 targets. + +Link: https://lore.kernel.org/r/20220616053508.27186-7-njavali@marvell.com +Fixes: 44c57f205876 ("scsi: qla2xxx: Changes to support FCP2 Target") +Cc: stable@vger.kernel.org +Signed-off-by: Arun Easi +Signed-off-by: Nilesh Javali +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_gs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/scsi/qla2xxx/qla_gs.c ++++ b/drivers/scsi/qla2xxx/qla_gs.c +@@ -3628,7 +3628,7 @@ login_logout: + do_delete) { + if (fcport->loop_id != FC_NO_LOOP_ID) { + if (fcport->flags & FCF_FCP2_DEVICE) +- fcport->logout_on_delete = 0; ++ continue; + + ql_log(ql_log_warn, vha, 0x20f0, + "%s %d %8phC post del sess\n", diff --git a/queue-5.15/scsi-qla2xxx-fix-losing-fcp-2-targets-on-long-port-disable-with-i-os.patch b/queue-5.15/scsi-qla2xxx-fix-losing-fcp-2-targets-on-long-port-disable-with-i-os.patch new file mode 100644 index 00000000000..4f5a04f5a27 --- /dev/null +++ b/queue-5.15/scsi-qla2xxx-fix-losing-fcp-2-targets-on-long-port-disable-with-i-os.patch @@ -0,0 +1,66 @@ +From 2416ccd3815ba1613e10a6da0a24ef21acfe5633 Mon Sep 17 00:00:00 2001 +From: Arun Easi +Date: Wed, 15 Jun 2022 22:35:06 -0700 +Subject: scsi: qla2xxx: Fix losing FCP-2 targets on long port disable with I/Os + +From: Arun Easi + +commit 2416ccd3815ba1613e10a6da0a24ef21acfe5633 upstream. + +FCP-2 devices were not coming back online once they were lost, login +retries exhausted, and then came back up. Fix this by accepting RSCN when +the device is not online. + +Link: https://lore.kernel.org/r/20220616053508.27186-10-njavali@marvell.com +Fixes: 44c57f205876 ("scsi: qla2xxx: Changes to support FCP2 Target") +Cc: stable@vger.kernel.org +Signed-off-by: Arun Easi +Signed-off-by: Nilesh Javali +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_init.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_init.c ++++ b/drivers/scsi/qla2xxx/qla_init.c +@@ -1836,7 +1836,8 @@ void qla2x00_handle_rscn(scsi_qla_host_t + case RSCN_PORT_ADDR: + fcport = qla2x00_find_fcport_by_nportid(vha, &ea->id, 1); + if (fcport) { +- if (fcport->flags & FCF_FCP2_DEVICE) { ++ if (fcport->flags & FCF_FCP2_DEVICE && ++ atomic_read(&fcport->state) == FCS_ONLINE) { + ql_dbg(ql_dbg_disc, vha, 0x2115, + "Delaying session delete for FCP2 portid=%06x %8phC ", + fcport->d_id.b24, fcport->port_name); +@@ -1868,7 +1869,8 @@ void qla2x00_handle_rscn(scsi_qla_host_t + break; + case RSCN_AREA_ADDR: + list_for_each_entry(fcport, &vha->vp_fcports, list) { +- if (fcport->flags & FCF_FCP2_DEVICE) ++ if (fcport->flags & FCF_FCP2_DEVICE && ++ atomic_read(&fcport->state) == FCS_ONLINE) + continue; + + if ((ea->id.b24 & 0xffff00) == (fcport->d_id.b24 & 0xffff00)) { +@@ -1879,7 +1881,8 @@ void qla2x00_handle_rscn(scsi_qla_host_t + break; + case RSCN_DOM_ADDR: + list_for_each_entry(fcport, &vha->vp_fcports, list) { +- if (fcport->flags & FCF_FCP2_DEVICE) ++ if (fcport->flags & FCF_FCP2_DEVICE && ++ atomic_read(&fcport->state) == FCS_ONLINE) + continue; + + if ((ea->id.b24 & 0xff0000) == (fcport->d_id.b24 & 0xff0000)) { +@@ -1891,7 +1894,8 @@ void qla2x00_handle_rscn(scsi_qla_host_t + case RSCN_FAB_ADDR: + default: + list_for_each_entry(fcport, &vha->vp_fcports, list) { +- if (fcport->flags & FCF_FCP2_DEVICE) ++ if (fcport->flags & FCF_FCP2_DEVICE && ++ atomic_read(&fcport->state) == FCS_ONLINE) + continue; + + fcport->scan_needed = 1; diff --git a/queue-5.15/scsi-qla2xxx-fix-losing-target-when-it-reappears-during-delete.patch b/queue-5.15/scsi-qla2xxx-fix-losing-target-when-it-reappears-during-delete.patch new file mode 100644 index 00000000000..1b9537fc4fa --- /dev/null +++ b/queue-5.15/scsi-qla2xxx-fix-losing-target-when-it-reappears-during-delete.patch @@ -0,0 +1,79 @@ +From 118b0c863c8f5629cc5271fc24d72d926e0715d9 Mon Sep 17 00:00:00 2001 +From: Arun Easi +Date: Wed, 15 Jun 2022 22:35:04 -0700 +Subject: scsi: qla2xxx: Fix losing target when it reappears during delete + +From: Arun Easi + +commit 118b0c863c8f5629cc5271fc24d72d926e0715d9 upstream. + +FC target disappeared during port perturbation tests due to a race that +tramples target state. Fix the issue by adding state checks before +proceeding. + +Link: https://lore.kernel.org/r/20220616053508.27186-8-njavali@marvell.com +Fixes: 44c57f205876 ("scsi: qla2xxx: Changes to support FCP2 Target") +Cc: stable@vger.kernel.org +Signed-off-by: Arun Easi +Signed-off-by: Nilesh Javali +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_attr.c | 24 +++++++++++++++++------- + 1 file changed, 17 insertions(+), 7 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_attr.c ++++ b/drivers/scsi/qla2xxx/qla_attr.c +@@ -2705,17 +2705,24 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rp + if (!fcport) + return; + +- /* Now that the rport has been deleted, set the fcport state to +- FCS_DEVICE_DEAD */ +- qla2x00_set_fcport_state(fcport, FCS_DEVICE_DEAD); ++ ++ /* ++ * Now that the rport has been deleted, set the fcport state to ++ * FCS_DEVICE_DEAD, if the fcport is still lost. ++ */ ++ if (fcport->scan_state != QLA_FCPORT_FOUND) ++ qla2x00_set_fcport_state(fcport, FCS_DEVICE_DEAD); + + /* + * Transport has effectively 'deleted' the rport, clear + * all local references. + */ + spin_lock_irqsave(host->host_lock, flags); +- fcport->rport = fcport->drport = NULL; +- *((fc_port_t **)rport->dd_data) = NULL; ++ /* Confirm port has not reappeared before clearing pointers. */ ++ if (rport->port_state != FC_PORTSTATE_ONLINE) { ++ fcport->rport = fcport->drport = NULL; ++ *((fc_port_t **)rport->dd_data) = NULL; ++ } + spin_unlock_irqrestore(host->host_lock, flags); + + if (test_bit(ABORT_ISP_ACTIVE, &fcport->vha->dpc_flags)) +@@ -2748,9 +2755,12 @@ qla2x00_terminate_rport_io(struct fc_rpo + /* + * At this point all fcport's software-states are cleared. Perform any + * final cleanup of firmware resources (PCBs and XCBs). ++ * ++ * Attempt to cleanup only lost devices. + */ + if (fcport->loop_id != FC_NO_LOOP_ID) { +- if (IS_FWI2_CAPABLE(fcport->vha->hw)) { ++ if (IS_FWI2_CAPABLE(fcport->vha->hw) && ++ fcport->scan_state != QLA_FCPORT_FOUND) { + if (fcport->loop_id != FC_NO_LOOP_ID) + fcport->logout_on_delete = 1; + +@@ -2760,7 +2770,7 @@ qla2x00_terminate_rport_io(struct fc_rpo + __LINE__); + qlt_schedule_sess_for_deletion(fcport); + } +- } else { ++ } else if (!IS_FWI2_CAPABLE(fcport->vha->hw)) { + qla2x00_port_logout(fcport->vha, fcport); + } + } diff --git a/queue-5.15/scsi-qla2xxx-turn-off-multi-queue-for-8g-adapters.patch b/queue-5.15/scsi-qla2xxx-turn-off-multi-queue-for-8g-adapters.patch new file mode 100644 index 00000000000..bfad6bb9ad0 --- /dev/null +++ b/queue-5.15/scsi-qla2xxx-turn-off-multi-queue-for-8g-adapters.patch @@ -0,0 +1,61 @@ +From 5304673bdb1635e27555bd636fd5d6956f1cd552 Mon Sep 17 00:00:00 2001 +From: Quinn Tran +Date: Wed, 15 Jun 2022 22:35:01 -0700 +Subject: scsi: qla2xxx: Turn off multi-queue for 8G adapters + +From: Quinn Tran + +commit 5304673bdb1635e27555bd636fd5d6956f1cd552 upstream. + +For 8G adapters, multi-queue was enabled accidentally. Make sure +multi-queue is not enabled. + +Link: https://lore.kernel.org/r/20220616053508.27186-5-njavali@marvell.com +Cc: stable@vger.kernel.org +Signed-off-by: Quinn Tran +Signed-off-by: Nilesh Javali +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_def.h | 4 ++-- + drivers/scsi/qla2xxx/qla_isr.c | 16 ++++++---------- + 2 files changed, 8 insertions(+), 12 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_def.h ++++ b/drivers/scsi/qla2xxx/qla_def.h +@@ -4261,8 +4261,8 @@ struct qla_hw_data { + #define IS_OEM_001(ha) ((ha)->device_type & DT_OEM_001) + #define HAS_EXTENDED_IDS(ha) ((ha)->device_type & DT_EXTENDED_IDS) + #define IS_CT6_SUPPORTED(ha) ((ha)->device_type & DT_CT6_SUPPORTED) +-#define IS_MQUE_CAPABLE(ha) ((ha)->mqenable || IS_QLA83XX(ha) || \ +- IS_QLA27XX(ha) || IS_QLA28XX(ha)) ++#define IS_MQUE_CAPABLE(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha) || \ ++ IS_QLA28XX(ha)) + #define IS_BIDI_CAPABLE(ha) \ + (IS_QLA25XX(ha) || IS_QLA2031(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) + /* Bit 21 of fw_attributes decides the MCTP capabilities */ +--- a/drivers/scsi/qla2xxx/qla_isr.c ++++ b/drivers/scsi/qla2xxx/qla_isr.c +@@ -4415,16 +4415,12 @@ msix_register_fail: + } + + /* Enable MSI-X vector for response queue update for queue 0 */ +- if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) { +- if (ha->msixbase && ha->mqiobase && +- (ha->max_rsp_queues > 1 || ha->max_req_queues > 1 || +- ql2xmqsupport)) +- ha->mqenable = 1; +- } else +- if (ha->mqiobase && +- (ha->max_rsp_queues > 1 || ha->max_req_queues > 1 || +- ql2xmqsupport)) +- ha->mqenable = 1; ++ if (IS_MQUE_CAPABLE(ha) && ++ (ha->msixbase && ha->mqiobase && ha->max_qpairs)) ++ ha->mqenable = 1; ++ else ++ ha->mqenable = 0; ++ + ql_dbg(ql_dbg_multiq, vha, 0xc005, + "mqiobase=%p, max_rsp_queues=%d, max_req_queues=%d.\n", + ha->mqiobase, ha->max_rsp_queues, ha->max_req_queues); diff --git a/queue-5.15/scsi-qla2xxx-wind-down-adapter-after-pcie-error.patch b/queue-5.15/scsi-qla2xxx-wind-down-adapter-after-pcie-error.patch new file mode 100644 index 00000000000..a92e90a6873 --- /dev/null +++ b/queue-5.15/scsi-qla2xxx-wind-down-adapter-after-pcie-error.patch @@ -0,0 +1,199 @@ +From d3117c83ba316b3200d9f2fe900f2b9a5525a25c Mon Sep 17 00:00:00 2001 +From: Quinn Tran +Date: Wed, 15 Jun 2022 22:35:00 -0700 +Subject: scsi: qla2xxx: Wind down adapter after PCIe error + +From: Quinn Tran + +commit d3117c83ba316b3200d9f2fe900f2b9a5525a25c upstream. + +Put adapter into a wind down state if OS does not make any attempt to +recover the adapter after PCIe error. + +Link: https://lore.kernel.org/r/20220616053508.27186-4-njavali@marvell.com +Cc: stable@vger.kernel.org +Signed-off-by: Quinn Tran +Signed-off-by: Nilesh Javali +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_bsg.c | 10 +++++++- + drivers/scsi/qla2xxx/qla_def.h | 4 +++ + drivers/scsi/qla2xxx/qla_init.c | 20 ++++++++++++++++ + drivers/scsi/qla2xxx/qla_os.c | 48 ++++++++++++++++++++++++++++++++++++++++ + 4 files changed, 81 insertions(+), 1 deletion(-) + +--- a/drivers/scsi/qla2xxx/qla_bsg.c ++++ b/drivers/scsi/qla2xxx/qla_bsg.c +@@ -2972,6 +2972,13 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_ + + ql_log(ql_log_info, vha, 0x708b, "%s CMD timeout. bsg ptr %p.\n", + __func__, bsg_job); ++ ++ if (qla2x00_isp_reg_stat(ha)) { ++ ql_log(ql_log_info, vha, 0x9007, ++ "PCI/Register disconnect.\n"); ++ qla_pci_set_eeh_busy(vha); ++ } ++ + /* find the bsg job from the active list of commands */ + spin_lock_irqsave(&ha->hardware_lock, flags); + for (que = 0; que < ha->max_req_queues; que++) { +@@ -2989,7 +2996,8 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_ + sp->u.bsg_job == bsg_job) { + req->outstanding_cmds[cnt] = NULL; + spin_unlock_irqrestore(&ha->hardware_lock, flags); +- if (ha->isp_ops->abort_command(sp)) { ++ ++ if (!ha->flags.eeh_busy && ha->isp_ops->abort_command(sp)) { + ql_log(ql_log_warn, vha, 0x7089, + "mbx abort_command failed.\n"); + bsg_reply->result = -EIO; +--- a/drivers/scsi/qla2xxx/qla_def.h ++++ b/drivers/scsi/qla2xxx/qla_def.h +@@ -4045,6 +4045,9 @@ struct qla_hw_data { + uint32_t n2n_fw_acc_sec:1; + uint32_t plogi_template_valid:1; + uint32_t port_isolated:1; ++ uint32_t eeh_flush:2; ++#define EEH_FLUSH_RDY 1 ++#define EEH_FLUSH_DONE 2 + } flags; + + uint16_t max_exchg; +@@ -4079,6 +4082,7 @@ struct qla_hw_data { + uint32_t rsp_que_len; + uint32_t req_que_off; + uint32_t rsp_que_off; ++ unsigned long eeh_jif; + + /* Multi queue data structs */ + device_reg_t *mqiobase; +--- a/drivers/scsi/qla2xxx/qla_init.c ++++ b/drivers/scsi/qla2xxx/qla_init.c +@@ -47,6 +47,7 @@ qla2x00_sp_timeout(struct timer_list *t) + { + srb_t *sp = from_timer(sp, t, u.iocb_cmd.timer); + struct srb_iocb *iocb; ++ scsi_qla_host_t *vha = sp->vha; + + WARN_ON(irqs_disabled()); + iocb = &sp->u.iocb_cmd; +@@ -54,6 +55,12 @@ qla2x00_sp_timeout(struct timer_list *t) + + /* ref: TMR */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); ++ ++ if (vha && qla2x00_isp_reg_stat(vha->hw)) { ++ ql_log(ql_log_info, vha, 0x9008, ++ "PCI/Register disconnect.\n"); ++ qla_pci_set_eeh_busy(vha); ++ } + } + + void qla2x00_sp_free(srb_t *sp) +@@ -9731,6 +9738,12 @@ int qla2xxx_disable_port(struct Scsi_Hos + + vha->hw->flags.port_isolated = 1; + ++ if (qla2x00_isp_reg_stat(vha->hw)) { ++ ql_log(ql_log_info, vha, 0x9006, ++ "PCI/Register disconnect, exiting.\n"); ++ qla_pci_set_eeh_busy(vha); ++ return FAILED; ++ } + if (qla2x00_chip_is_down(vha)) + return 0; + +@@ -9746,6 +9759,13 @@ int qla2xxx_enable_port(struct Scsi_Host + { + scsi_qla_host_t *vha = shost_priv(host); + ++ if (qla2x00_isp_reg_stat(vha->hw)) { ++ ql_log(ql_log_info, vha, 0x9001, ++ "PCI/Register disconnect, exiting.\n"); ++ qla_pci_set_eeh_busy(vha); ++ return FAILED; ++ } ++ + vha->hw->flags.port_isolated = 0; + /* Set the flag to 1, so that isp_abort can proceed */ + vha->flags.online = 1; +--- a/drivers/scsi/qla2xxx/qla_os.c ++++ b/drivers/scsi/qla2xxx/qla_os.c +@@ -333,6 +333,11 @@ MODULE_PARM_DESC(ql2xabts_wait_nvme, + "To wait for ABTS response on I/O timeouts for NVMe. (default: 1)"); + + ++u32 ql2xdelay_before_pci_error_handling = 5; ++module_param(ql2xdelay_before_pci_error_handling, uint, 0644); ++MODULE_PARM_DESC(ql2xdelay_before_pci_error_handling, ++ "Number of seconds delayed before qla begin PCI error self-handling (default: 5).\n"); ++ + static void qla2x00_clear_drv_active(struct qla_hw_data *); + static void qla2x00_free_device(scsi_qla_host_t *); + static int qla2xxx_map_queues(struct Scsi_Host *shost); +@@ -7251,6 +7256,44 @@ static void qla_heart_beat(struct scsi_q + } + } + ++static void qla_wind_down_chip(scsi_qla_host_t *vha) ++{ ++ struct qla_hw_data *ha = vha->hw; ++ ++ if (!ha->flags.eeh_busy) ++ return; ++ if (ha->pci_error_state) ++ /* system is trying to recover */ ++ return; ++ ++ /* ++ * Current system is not handling PCIE error. At this point, this is ++ * best effort to wind down the adapter. ++ */ ++ if (time_after_eq(jiffies, ha->eeh_jif + ql2xdelay_before_pci_error_handling * HZ) && ++ !ha->flags.eeh_flush) { ++ ql_log(ql_log_info, vha, 0x9009, ++ "PCI Error detected, attempting to reset hardware.\n"); ++ ++ ha->isp_ops->reset_chip(vha); ++ ha->isp_ops->disable_intrs(ha); ++ ++ ha->flags.eeh_flush = EEH_FLUSH_RDY; ++ ha->eeh_jif = jiffies; ++ ++ } else if (ha->flags.eeh_flush == EEH_FLUSH_RDY && ++ time_after_eq(jiffies, ha->eeh_jif + 5 * HZ)) { ++ pci_clear_master(ha->pdev); ++ ++ /* flush all command */ ++ qla2x00_abort_isp_cleanup(vha); ++ ha->flags.eeh_flush = EEH_FLUSH_DONE; ++ ++ ql_log(ql_log_info, vha, 0x900a, ++ "PCI Error handling complete, all IOs aborted.\n"); ++ } ++} ++ + /************************************************************************** + * qla2x00_timer + * +@@ -7274,6 +7317,8 @@ qla2x00_timer(struct timer_list *t) + fc_port_t *fcport = NULL; + + if (ha->flags.eeh_busy) { ++ qla_wind_down_chip(vha); ++ + ql_dbg(ql_dbg_timer, vha, 0x6000, + "EEH = %d, restarting timer.\n", + ha->flags.eeh_busy); +@@ -7854,6 +7899,9 @@ void qla_pci_set_eeh_busy(struct scsi_ql + + spin_lock_irqsave(&base_vha->work_lock, flags); + if (!ha->flags.eeh_busy) { ++ ha->eeh_jif = jiffies; ++ ha->flags.eeh_flush = 0; ++ + ha->flags.eeh_busy = 1; + do_cleanup = true; + } diff --git a/queue-5.15/scsi-ufs-core-correct-ufshcd_shutdown-flow.patch b/queue-5.15/scsi-ufs-core-correct-ufshcd_shutdown-flow.patch new file mode 100644 index 00000000000..cc14cca7600 --- /dev/null +++ b/queue-5.15/scsi-ufs-core-correct-ufshcd_shutdown-flow.patch @@ -0,0 +1,47 @@ +From 00511d2abf5708ad05dd5d1c36adb2468d274698 Mon Sep 17 00:00:00 2001 +From: Peter Wang +Date: Wed, 27 Jul 2022 11:05:26 +0800 +Subject: scsi: ufs: core: Correct ufshcd_shutdown() flow + +From: Peter Wang + +commit 00511d2abf5708ad05dd5d1c36adb2468d274698 upstream. + +After ufshcd_wl_shutdown() set device power off and link off, +ufshcd_shutdown() could turn off clock/power. Also remove +pm_runtime_get_sync. + +The reason why it is safe to remove pm_runtime_get_sync() is because: + + - ufshcd_wl_shutdown() -> pm_runtime_get_sync() will resume hba->dev too. + + - device resume(turn on clk/power) is not required, even if device is in + RPM_SUSPENDED. + +Link: https://lore.kernel.org/r/20220727030526.31022-1-peter.wang@mediatek.com +Fixes: b294ff3e3449 ("scsi: ufs: core: Enable power management for wlun") +Cc: # 5.15.x +Reviewed-by: Stanley Chu +Signed-off-by: Peter Wang +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/ufs/ufshcd.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +--- a/drivers/scsi/ufs/ufshcd.c ++++ b/drivers/scsi/ufs/ufshcd.c +@@ -9239,12 +9239,8 @@ EXPORT_SYMBOL(ufshcd_runtime_resume); + int ufshcd_shutdown(struct ufs_hba *hba) + { + if (ufshcd_is_ufs_dev_poweroff(hba) && ufshcd_is_link_off(hba)) +- goto out; ++ ufshcd_suspend(hba); + +- pm_runtime_get_sync(hba->dev); +- +- ufshcd_suspend(hba); +-out: + hba->is_powered = false; + /* allow force shutdown even in case of errors */ + return 0; diff --git a/queue-5.15/scsi-zfcp-fix-missing-auto-port-scan-and-thus-missing-target-ports.patch b/queue-5.15/scsi-zfcp-fix-missing-auto-port-scan-and-thus-missing-target-ports.patch new file mode 100644 index 00000000000..7c6385455af --- /dev/null +++ b/queue-5.15/scsi-zfcp-fix-missing-auto-port-scan-and-thus-missing-target-ports.patch @@ -0,0 +1,232 @@ +From 4da8c5f76825269f28d6a89fa752934a4bcb6dfa Mon Sep 17 00:00:00 2001 +From: Steffen Maier +Date: Fri, 29 Jul 2022 18:25:29 +0200 +Subject: scsi: zfcp: Fix missing auto port scan and thus missing target ports + +From: Steffen Maier + +commit 4da8c5f76825269f28d6a89fa752934a4bcb6dfa upstream. + +Case (1): + The only waiter on wka_port->completion_wq is zfcp_fc_wka_port_get() + trying to open a WKA port. As such it should only be woken up by WKA port + *open* responses, not by WKA port close responses. + +Case (2): + A close WKA port response coming in just after having sent a new open WKA + port request and before blocking for the open response with wait_event() + in zfcp_fc_wka_port_get() erroneously renders the wait_event a NOP + because the close handler overwrites wka_port->status. Hence the + wait_event condition is erroneously true and it does not enter blocking + state. + +With non-negligible probability, the following time space sequence happens +depending on timing without this fix: + +user process ERP thread zfcp work queue tasklet system work queue +============ ========== =============== ======= ================= +$ echo 1 > online +zfcp_ccw_set_online +zfcp_ccw_activate +zfcp_erp_adapter_reopen +msleep scan backoff zfcp_erp_strategy +| ... +| zfcp_erp_action_cleanup +| ... +| queue delayed scan_work +| queue ns_up_work +| ns_up_work: +| zfcp_fc_wka_port_get +| open wka request +| open response +| GSPN FC-GS +| RSPN FC-GS [NPIV-only] +| zfcp_fc_wka_port_put +| (--wka->refcount==0) +| sched delayed wka->work +| +~~~Case (1)~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +zfcp_erp_wait +flush scan_work +| wka->work: +| wka->status=CLOSING +| close wka request +| scan_work: +| zfcp_fc_wka_port_get +| (wka->status==CLOSING) +| wka->status=OPENING +| open wka request +| wait_event +| | close response +| | wka->status=OFFLINE +| | wake_up /*WRONG*/ +~~~Case (2)~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +| wka->work: +| wka->status=CLOSING +| close wka request +zfcp_erp_wait +flush scan_work +| scan_work: +| zfcp_fc_wka_port_get +| (wka->status==CLOSING) +| wka->status=OPENING +| open wka request +| close response +| wka->status=OFFLINE +| wake_up /*WRONG&NOP*/ +| wait_event /*NOP*/ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +| (wka->status!=ONLINE) +| return -EIO +| return early + open response + wka->status=ONLINE + wake_up /*NOP*/ + +So we erroneously end up with no automatic port scan. This is a big problem +when it happens during boot. The timing is influenced by v3.19 commit +18f87a67e6d6 ("zfcp: auto port scan resiliency"). + +Fix it by fully mutually excluding zfcp_fc_wka_port_get() and +zfcp_fc_wka_port_offline(). For that to work, we make the latter block +until we got the response for a close WKA port. In order not to penalize +the system workqueue, we move wka_port->work to our own adapter workqueue. +Note that before v2.6.30 commit 828bc1212a68 ("[SCSI] zfcp: Set WKA-port to +offline on adapter deactivation"), zfcp did block in +zfcp_fc_wka_port_offline() as well, but with a different condition. + +While at it, make non-functional cleanups to improve code reading in +zfcp_fc_wka_port_get(). If we cannot send the WKA port open request, don't +rely on the subsequent wait_event condition to immediately let this case +pass without blocking. Also don't want to rely on the additional condition +handling the refcount to be skipped just to finally return with -EIO. + +Link: https://lore.kernel.org/r/20220729162529.1620730-1-maier@linux.ibm.com +Fixes: 5ab944f97e09 ("[SCSI] zfcp: attach and release SAN nameserver port on demand") +Cc: #v2.6.28+ +Reviewed-by: Benjamin Block +Signed-off-by: Steffen Maier +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/s390/scsi/zfcp_fc.c | 29 ++++++++++++++++++++--------- + drivers/s390/scsi/zfcp_fc.h | 6 ++++-- + drivers/s390/scsi/zfcp_fsf.c | 4 ++-- + 3 files changed, 26 insertions(+), 13 deletions(-) + +--- a/drivers/s390/scsi/zfcp_fc.c ++++ b/drivers/s390/scsi/zfcp_fc.c +@@ -145,27 +145,33 @@ void zfcp_fc_enqueue_event(struct zfcp_a + + static int zfcp_fc_wka_port_get(struct zfcp_fc_wka_port *wka_port) + { ++ int ret = -EIO; ++ + if (mutex_lock_interruptible(&wka_port->mutex)) + return -ERESTARTSYS; + + if (wka_port->status == ZFCP_FC_WKA_PORT_OFFLINE || + wka_port->status == ZFCP_FC_WKA_PORT_CLOSING) { + wka_port->status = ZFCP_FC_WKA_PORT_OPENING; +- if (zfcp_fsf_open_wka_port(wka_port)) ++ if (zfcp_fsf_open_wka_port(wka_port)) { ++ /* could not even send request, nothing to wait for */ + wka_port->status = ZFCP_FC_WKA_PORT_OFFLINE; ++ goto out; ++ } + } + +- mutex_unlock(&wka_port->mutex); +- +- wait_event(wka_port->completion_wq, ++ wait_event(wka_port->opened, + wka_port->status == ZFCP_FC_WKA_PORT_ONLINE || + wka_port->status == ZFCP_FC_WKA_PORT_OFFLINE); + + if (wka_port->status == ZFCP_FC_WKA_PORT_ONLINE) { + atomic_inc(&wka_port->refcount); +- return 0; ++ ret = 0; ++ goto out; + } +- return -EIO; ++out: ++ mutex_unlock(&wka_port->mutex); ++ return ret; + } + + static void zfcp_fc_wka_port_offline(struct work_struct *work) +@@ -181,9 +187,12 @@ static void zfcp_fc_wka_port_offline(str + + wka_port->status = ZFCP_FC_WKA_PORT_CLOSING; + if (zfcp_fsf_close_wka_port(wka_port)) { ++ /* could not even send request, nothing to wait for */ + wka_port->status = ZFCP_FC_WKA_PORT_OFFLINE; +- wake_up(&wka_port->completion_wq); ++ goto out; + } ++ wait_event(wka_port->closed, ++ wka_port->status == ZFCP_FC_WKA_PORT_OFFLINE); + out: + mutex_unlock(&wka_port->mutex); + } +@@ -193,13 +202,15 @@ static void zfcp_fc_wka_port_put(struct + if (atomic_dec_return(&wka_port->refcount) != 0) + return; + /* wait 10 milliseconds, other reqs might pop in */ +- schedule_delayed_work(&wka_port->work, HZ / 100); ++ queue_delayed_work(wka_port->adapter->work_queue, &wka_port->work, ++ msecs_to_jiffies(10)); + } + + static void zfcp_fc_wka_port_init(struct zfcp_fc_wka_port *wka_port, u32 d_id, + struct zfcp_adapter *adapter) + { +- init_waitqueue_head(&wka_port->completion_wq); ++ init_waitqueue_head(&wka_port->opened); ++ init_waitqueue_head(&wka_port->closed); + + wka_port->adapter = adapter; + wka_port->d_id = d_id; +--- a/drivers/s390/scsi/zfcp_fc.h ++++ b/drivers/s390/scsi/zfcp_fc.h +@@ -185,7 +185,8 @@ enum zfcp_fc_wka_status { + /** + * struct zfcp_fc_wka_port - representation of well-known-address (WKA) FC port + * @adapter: Pointer to adapter structure this WKA port belongs to +- * @completion_wq: Wait for completion of open/close command ++ * @opened: Wait for completion of open command ++ * @closed: Wait for completion of close command + * @status: Current status of WKA port + * @refcount: Reference count to keep port open as long as it is in use + * @d_id: FC destination id or well-known-address +@@ -195,7 +196,8 @@ enum zfcp_fc_wka_status { + */ + struct zfcp_fc_wka_port { + struct zfcp_adapter *adapter; +- wait_queue_head_t completion_wq; ++ wait_queue_head_t opened; ++ wait_queue_head_t closed; + enum zfcp_fc_wka_status status; + atomic_t refcount; + u32 d_id; +--- a/drivers/s390/scsi/zfcp_fsf.c ++++ b/drivers/s390/scsi/zfcp_fsf.c +@@ -1907,7 +1907,7 @@ static void zfcp_fsf_open_wka_port_handl + wka_port->status = ZFCP_FC_WKA_PORT_ONLINE; + } + out: +- wake_up(&wka_port->completion_wq); ++ wake_up(&wka_port->opened); + } + + /** +@@ -1966,7 +1966,7 @@ static void zfcp_fsf_close_wka_port_hand + } + + wka_port->status = ZFCP_FC_WKA_PORT_OFFLINE; +- wake_up(&wka_port->completion_wq); ++ wake_up(&wka_port->closed); + } + + /** diff --git a/queue-5.15/series b/queue-5.15/series index e9327ca9da2..f99f8f021dc 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -671,3 +671,15 @@ f2fs-do-not-allow-to-decompress-files-have-fi_compre.patch video-fbdev-vt8623fb-check-the-size-of-screen-before.patch video-fbdev-arkfb-check-the-size-of-screen-before-me.patch video-fbdev-s3fb-check-the-size-of-screen-before-mem.patch +scsi-ufs-core-correct-ufshcd_shutdown-flow.patch +scsi-zfcp-fix-missing-auto-port-scan-and-thus-missing-target-ports.patch +scsi-qla2xxx-fix-imbalance-vha-vref_count.patch +scsi-qla2xxx-fix-discovery-issues-in-fc-al-topology.patch +scsi-qla2xxx-turn-off-multi-queue-for-8g-adapters.patch +scsi-qla2xxx-fix-crash-due-to-stale-srb-access-around-i-o-timeouts.patch +scsi-qla2xxx-fix-excessive-i-o-error-messages-by-default.patch +scsi-qla2xxx-fix-erroneous-mailbox-timeout-after-pci-error-injection.patch +scsi-qla2xxx-wind-down-adapter-after-pcie-error.patch +scsi-qla2xxx-fix-losing-fcp-2-targets-on-long-port-disable-with-i-os.patch +scsi-qla2xxx-fix-losing-target-when-it-reappears-during-delete.patch +scsi-qla2xxx-fix-losing-fcp-2-targets-during-port-perturbation-tests.patch -- 2.47.3