From 883468946585f539f21e419137401cd632c191ce Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 1 Apr 2024 12:31:34 +0200 Subject: [PATCH] 5.10-stable patches added patches: scsi-qla2xxx-delay-i-o-abort-on-pci-error.patch scsi-qla2xxx-fix-command-flush-on-cable-pull.patch scsi-qla2xxx-split-fce-eft-trace-control.patch --- ...qla2xxx-delay-i-o-abort-on-pci-error.patch | 57 +++++ ...2xxx-fix-command-flush-on-cable-pull.patch | 94 ++++++++ ...-qla2xxx-split-fce-eft-trace-control.patch | 224 ++++++++++++++++++ queue-5.10/series | 3 + 4 files changed, 378 insertions(+) create mode 100644 queue-5.10/scsi-qla2xxx-delay-i-o-abort-on-pci-error.patch create mode 100644 queue-5.10/scsi-qla2xxx-fix-command-flush-on-cable-pull.patch create mode 100644 queue-5.10/scsi-qla2xxx-split-fce-eft-trace-control.patch diff --git a/queue-5.10/scsi-qla2xxx-delay-i-o-abort-on-pci-error.patch b/queue-5.10/scsi-qla2xxx-delay-i-o-abort-on-pci-error.patch new file mode 100644 index 00000000000..2aaa6cb537f --- /dev/null +++ b/queue-5.10/scsi-qla2xxx-delay-i-o-abort-on-pci-error.patch @@ -0,0 +1,57 @@ +From 591c1fdf2016d118b8fbde427b796fac13f3f070 Mon Sep 17 00:00:00 2001 +From: Quinn Tran +Date: Tue, 27 Feb 2024 22:11:26 +0530 +Subject: scsi: qla2xxx: Delay I/O Abort on PCI error + +From: Quinn Tran + +commit 591c1fdf2016d118b8fbde427b796fac13f3f070 upstream. + +Currently when PCI error is detected, I/O is aborted manually through the +ABORT IOCB mechanism which is not guaranteed to succeed. + +Instead, wait for the OS or system to notify driver to wind down I/O +through the pci_error_handlers api. Set eeh_busy flag to pause all traffic +and wait for I/O to drain. + +Cc: stable@vger.kernel.org +Signed-off-by: Quinn Tran +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20240227164127.36465-11-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_attr.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_attr.c ++++ b/drivers/scsi/qla2xxx/qla_attr.c +@@ -2689,7 +2689,13 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rp + return; + + if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) { +- qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16); ++ /* Will wait for wind down of adapter */ ++ ql_dbg(ql_dbg_aer, fcport->vha, 0x900c, ++ "%s pci offline detected (id %06x)\n", __func__, ++ fcport->d_id.b24); ++ qla_pci_set_eeh_busy(fcport->vha); ++ qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24, ++ 0, WAIT_TARGET); + return; + } + } +@@ -2711,7 +2717,11 @@ qla2x00_terminate_rport_io(struct fc_rpo + vha = fcport->vha; + + if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) { +- qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16); ++ /* Will wait for wind down of adapter */ ++ ql_dbg(ql_dbg_aer, fcport->vha, 0x900b, ++ "%s pci offline detected (id %06x)\n", __func__, ++ fcport->d_id.b24); ++ qla_pci_set_eeh_busy(vha); + qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24, + 0, WAIT_TARGET); + return; diff --git a/queue-5.10/scsi-qla2xxx-fix-command-flush-on-cable-pull.patch b/queue-5.10/scsi-qla2xxx-fix-command-flush-on-cable-pull.patch new file mode 100644 index 00000000000..1d4e9417f9f --- /dev/null +++ b/queue-5.10/scsi-qla2xxx-fix-command-flush-on-cable-pull.patch @@ -0,0 +1,94 @@ +From a27d4d0e7de305def8a5098a614053be208d1aa1 Mon Sep 17 00:00:00 2001 +From: Quinn Tran +Date: Tue, 27 Feb 2024 22:11:22 +0530 +Subject: scsi: qla2xxx: Fix command flush on cable pull + +From: Quinn Tran + +commit a27d4d0e7de305def8a5098a614053be208d1aa1 upstream. + +System crash due to command failed to flush back to SCSI layer. + + BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 + PGD 0 P4D 0 + Oops: 0000 [#1] SMP NOPTI + CPU: 27 PID: 793455 Comm: kworker/u130:6 Kdump: loaded Tainted: G OE --------- - - 4.18.0-372.9.1.el8.x86_64 #1 + Hardware name: HPE ProLiant DL360 Gen10/ProLiant DL360 Gen10, BIOS U32 09/03/2021 + Workqueue: nvme-wq nvme_fc_connect_ctrl_work [nvme_fc] + RIP: 0010:__wake_up_common+0x4c/0x190 + Code: 24 10 4d 85 c9 74 0a 41 f6 01 04 0f 85 9d 00 00 00 48 8b 43 08 48 83 c3 08 4c 8d 48 e8 49 8d 41 18 48 39 c3 0f 84 f0 00 00 00 <49> 8b 41 18 89 54 24 08 31 ed 4c 8d 70 e8 45 8b 29 41 f6 c5 04 75 + RSP: 0018:ffff95f3e0cb7cd0 EFLAGS: 00010086 + RAX: 0000000000000000 RBX: ffff8b08d3b26328 RCX: 0000000000000000 + RDX: 0000000000000001 RSI: 0000000000000003 RDI: ffff8b08d3b26320 + RBP: 0000000000000001 R08: 0000000000000000 R09: ffffffffffffffe8 + R10: 0000000000000000 R11: ffff95f3e0cb7a60 R12: ffff95f3e0cb7d20 + R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000000 + FS: 0000000000000000(0000) GS:ffff8b2fdf6c0000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000000000000 CR3: 0000002f1e410002 CR4: 00000000007706e0 + DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 + DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 + PKRU: 55555554 + Call Trace: + __wake_up_common_lock+0x7c/0xc0 + qla_nvme_ls_req+0x355/0x4c0 [qla2xxx] + qla2xxx [0000:12:00.1]-f084:3: qlt_free_session_done: se_sess 0000000000000000 / sess ffff8ae1407ca000 from port 21:32:00:02:ac:07:ee:b8 loop_id 0x02 s_id 01:02:00 logout 1 keep 0 els_logo 0 + ? __nvme_fc_send_ls_req+0x260/0x380 [nvme_fc] + qla2xxx [0000:12:00.1]-207d:3: FCPort 21:32:00:02:ac:07:ee:b8 state transitioned from ONLINE to LOST - portid=010200. + ? nvme_fc_send_ls_req.constprop.42+0x1a/0x45 [nvme_fc] + qla2xxx [0000:12:00.1]-2109:3: qla2x00_schedule_rport_del 21320002ac07eeb8. rport ffff8ae598122000 roles 1 + ? nvme_fc_connect_ctrl_work.cold.63+0x1e3/0xa7d [nvme_fc] + qla2xxx [0000:12:00.1]-f084:3: qlt_free_session_done: se_sess 0000000000000000 / sess ffff8ae14801e000 from port 21:32:01:02:ad:f7:ee:b8 loop_id 0x04 s_id 01:02:01 logout 1 keep 0 els_logo 0 + ? __switch_to+0x10c/0x450 + ? process_one_work+0x1a7/0x360 + qla2xxx [0000:12:00.1]-207d:3: FCPort 21:32:01:02:ad:f7:ee:b8 state transitioned from ONLINE to LOST - portid=010201. + ? worker_thread+0x1ce/0x390 + ? create_worker+0x1a0/0x1a0 + qla2xxx [0000:12:00.1]-2109:3: qla2x00_schedule_rport_del 21320102adf7eeb8. rport ffff8ae3b2312800 roles 70 + ? kthread+0x10a/0x120 + qla2xxx [0000:12:00.1]-2112:3: qla_nvme_unregister_remote_port: unregister remoteport on ffff8ae14801e000 21320102adf7eeb8 + ? set_kthread_struct+0x40/0x40 + qla2xxx [0000:12:00.1]-2110:3: remoteport_delete of ffff8ae14801e000 21320102adf7eeb8 completed. + ? ret_from_fork+0x1f/0x40 + qla2xxx [0000:12:00.1]-f086:3: qlt_free_session_done: waiting for sess ffff8ae14801e000 logout + +The system was under memory stress where driver was not able to allocate an +SRB to carry out error recovery of cable pull. The failure to flush causes +upper layer to start modifying scsi_cmnd. When the system frees up some +memory, the subsequent cable pull trigger another command flush. At this +point the driver access a null pointer when attempting to DMA unmap the +SGL. + +Add a check to make sure commands are flush back on session tear down to +prevent the null pointer access. + +Cc: stable@vger.kernel.org +Signed-off-by: Quinn Tran +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20240227164127.36465-7-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_target.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/drivers/scsi/qla2xxx/qla_target.c ++++ b/drivers/scsi/qla2xxx/qla_target.c +@@ -1038,6 +1038,16 @@ void qlt_free_session_done(struct work_s + "%s: sess %p logout completed\n", __func__, sess); + } + ++ /* check for any straggling io left behind */ ++ if (!(sess->flags & FCF_FCP2_DEVICE) && ++ qla2x00_eh_wait_for_pending_commands(sess->vha, sess->d_id.b24, 0, WAIT_TARGET)) { ++ ql_log(ql_log_warn, vha, 0x3027, ++ "IO not return. Resetting.\n"); ++ set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); ++ qla2xxx_wake_dpc(vha); ++ qla2x00_wait_for_chip_reset(vha); ++ } ++ + if (sess->logo_ack_needed) { + sess->logo_ack_needed = 0; + qla24xx_async_notify_ack(vha, sess, diff --git a/queue-5.10/scsi-qla2xxx-split-fce-eft-trace-control.patch b/queue-5.10/scsi-qla2xxx-split-fce-eft-trace-control.patch new file mode 100644 index 00000000000..f4e52af032f --- /dev/null +++ b/queue-5.10/scsi-qla2xxx-split-fce-eft-trace-control.patch @@ -0,0 +1,224 @@ +From 76a192e1a566e15365704b9f8fb3b70825f85064 Mon Sep 17 00:00:00 2001 +From: Quinn Tran +Date: Tue, 27 Feb 2024 22:11:19 +0530 +Subject: scsi: qla2xxx: Split FCE|EFT trace control + +From: Quinn Tran + +commit 76a192e1a566e15365704b9f8fb3b70825f85064 upstream. + +Current code combines the allocation of FCE|EFT trace buffers and enables +the features all in 1 step. + +Split this step into separate steps in preparation for follow-on patch to +allow user to have a choice to enable / disable FCE trace feature. + +Cc: stable@vger.kernel.org +Reported-by: kernel test robot +Signed-off-by: Quinn Tran +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20240227164127.36465-4-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/scsi/qla2xxx/qla_init.c | 102 ++++++++++++++++------------------------ + 1 file changed, 41 insertions(+), 61 deletions(-) + +--- a/drivers/scsi/qla2xxx/qla_init.c ++++ b/drivers/scsi/qla2xxx/qla_init.c +@@ -2280,6 +2280,40 @@ exit: + return rval; + } + ++static void qla_enable_fce_trace(scsi_qla_host_t *vha) ++{ ++ int rval; ++ struct qla_hw_data *ha = vha->hw; ++ ++ if (ha->fce) { ++ ha->flags.fce_enabled = 1; ++ memset(ha->fce, 0, fce_calc_size(ha->fce_bufs)); ++ rval = qla2x00_enable_fce_trace(vha, ++ ha->fce_dma, ha->fce_bufs, ha->fce_mb, &ha->fce_bufs); ++ ++ if (rval) { ++ ql_log(ql_log_warn, vha, 0x8033, ++ "Unable to reinitialize FCE (%d).\n", rval); ++ ha->flags.fce_enabled = 0; ++ } ++ } ++} ++ ++static void qla_enable_eft_trace(scsi_qla_host_t *vha) ++{ ++ int rval; ++ struct qla_hw_data *ha = vha->hw; ++ ++ if (ha->eft) { ++ memset(ha->eft, 0, EFT_SIZE); ++ rval = qla2x00_enable_eft_trace(vha, ha->eft_dma, EFT_NUM_BUFFERS); ++ ++ if (rval) { ++ ql_log(ql_log_warn, vha, 0x8034, ++ "Unable to reinitialize EFT (%d).\n", rval); ++ } ++ } ++} + /* + * qla2x00_initialize_adapter + * Initialize board. +@@ -3230,9 +3264,8 @@ qla24xx_chip_diag(scsi_qla_host_t *vha) + } + + static void +-qla2x00_init_fce_trace(scsi_qla_host_t *vha) ++qla2x00_alloc_fce_trace(scsi_qla_host_t *vha) + { +- int rval; + dma_addr_t tc_dma; + void *tc; + struct qla_hw_data *ha = vha->hw; +@@ -3261,27 +3294,17 @@ qla2x00_init_fce_trace(scsi_qla_host_t * + return; + } + +- rval = qla2x00_enable_fce_trace(vha, tc_dma, FCE_NUM_BUFFERS, +- ha->fce_mb, &ha->fce_bufs); +- if (rval) { +- ql_log(ql_log_warn, vha, 0x00bf, +- "Unable to initialize FCE (%d).\n", rval); +- dma_free_coherent(&ha->pdev->dev, FCE_SIZE, tc, tc_dma); +- return; +- } +- + ql_dbg(ql_dbg_init, vha, 0x00c0, + "Allocated (%d KB) for FCE...\n", FCE_SIZE / 1024); + +- ha->flags.fce_enabled = 1; + ha->fce_dma = tc_dma; + ha->fce = tc; ++ ha->fce_bufs = FCE_NUM_BUFFERS; + } + + static void +-qla2x00_init_eft_trace(scsi_qla_host_t *vha) ++qla2x00_alloc_eft_trace(scsi_qla_host_t *vha) + { +- int rval; + dma_addr_t tc_dma; + void *tc; + struct qla_hw_data *ha = vha->hw; +@@ -3306,14 +3329,6 @@ qla2x00_init_eft_trace(scsi_qla_host_t * + return; + } + +- rval = qla2x00_enable_eft_trace(vha, tc_dma, EFT_NUM_BUFFERS); +- if (rval) { +- ql_log(ql_log_warn, vha, 0x00c2, +- "Unable to initialize EFT (%d).\n", rval); +- dma_free_coherent(&ha->pdev->dev, EFT_SIZE, tc, tc_dma); +- return; +- } +- + ql_dbg(ql_dbg_init, vha, 0x00c3, + "Allocated (%d KB) EFT ...\n", EFT_SIZE / 1024); + +@@ -3321,13 +3336,6 @@ qla2x00_init_eft_trace(scsi_qla_host_t * + ha->eft = tc; + } + +-static void +-qla2x00_alloc_offload_mem(scsi_qla_host_t *vha) +-{ +- qla2x00_init_fce_trace(vha); +- qla2x00_init_eft_trace(vha); +-} +- + void + qla2x00_alloc_fw_dump(scsi_qla_host_t *vha) + { +@@ -3382,10 +3390,10 @@ qla2x00_alloc_fw_dump(scsi_qla_host_t *v + if (ha->tgt.atio_ring) + mq_size += ha->tgt.atio_q_length * sizeof(request_t); + +- qla2x00_init_fce_trace(vha); ++ qla2x00_alloc_fce_trace(vha); + if (ha->fce) + fce_size = sizeof(struct qla2xxx_fce_chain) + FCE_SIZE; +- qla2x00_init_eft_trace(vha); ++ qla2x00_alloc_eft_trace(vha); + if (ha->eft) + eft_size = EFT_SIZE; + } +@@ -3784,7 +3792,6 @@ qla2x00_setup_chip(scsi_qla_host_t *vha) + struct qla_hw_data *ha = vha->hw; + struct device_reg_2xxx __iomem *reg = &ha->iobase->isp; + unsigned long flags; +- uint16_t fw_major_version; + int done_once = 0; + + if (IS_P3P_TYPE(ha)) { +@@ -3851,7 +3858,6 @@ execute_fw_with_lr: + goto failed; + + enable_82xx_npiv: +- fw_major_version = ha->fw_major_version; + if (IS_P3P_TYPE(ha)) + qla82xx_check_md_needed(vha); + else +@@ -3880,12 +3886,11 @@ enable_82xx_npiv: + if (rval != QLA_SUCCESS) + goto failed; + +- if (!fw_major_version && !(IS_P3P_TYPE(ha))) +- qla2x00_alloc_offload_mem(vha); +- + if (ql2xallocfwdump && !(IS_P3P_TYPE(ha))) + qla2x00_alloc_fw_dump(vha); + ++ qla_enable_fce_trace(vha); ++ qla_enable_eft_trace(vha); + } else { + goto failed; + } +@@ -7012,7 +7017,6 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_ + int + qla2x00_abort_isp(scsi_qla_host_t *vha) + { +- int rval; + uint8_t status = 0; + struct qla_hw_data *ha = vha->hw; + struct scsi_qla_host *vp; +@@ -7100,31 +7104,7 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) + + if (IS_QLA81XX(ha) || IS_QLA8031(ha)) + qla2x00_get_fw_version(vha); +- if (ha->fce) { +- ha->flags.fce_enabled = 1; +- memset(ha->fce, 0, +- fce_calc_size(ha->fce_bufs)); +- rval = qla2x00_enable_fce_trace(vha, +- ha->fce_dma, ha->fce_bufs, ha->fce_mb, +- &ha->fce_bufs); +- if (rval) { +- ql_log(ql_log_warn, vha, 0x8033, +- "Unable to reinitialize FCE " +- "(%d).\n", rval); +- ha->flags.fce_enabled = 0; +- } +- } + +- if (ha->eft) { +- memset(ha->eft, 0, EFT_SIZE); +- rval = qla2x00_enable_eft_trace(vha, +- ha->eft_dma, EFT_NUM_BUFFERS); +- if (rval) { +- ql_log(ql_log_warn, vha, 0x8034, +- "Unable to reinitialize EFT " +- "(%d).\n", rval); +- } +- } + } else { /* failed the ISP abort */ + vha->flags.online = 1; + if (test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) { diff --git a/queue-5.10/series b/queue-5.10/series index ee707a4bfec..8212c209fad 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -173,3 +173,6 @@ usb-udc-remove-warning-when-queue-disabled-ep.patch usb-typec-ucsi-check-for-notifications-after-init.patch usb-typec-ucsi-ack-unsupported-commands.patch usb-typec-ucsi-clear-ucsi_cci_reset_complete-before-reset.patch +scsi-qla2xxx-split-fce-eft-trace-control.patch +scsi-qla2xxx-fix-command-flush-on-cable-pull.patch +scsi-qla2xxx-delay-i-o-abort-on-pci-error.patch -- 2.47.3