]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
scsi: lpfc: Fix reusing an ndlp that is marked NLP_DROPPED during FLOGI
authorJustin Tee <justin.tee@broadcom.com>
Thu, 6 Nov 2025 22:46:36 +0000 (14:46 -0800)
committerMartin K. Petersen <martin.petersen@oracle.com>
Sat, 8 Nov 2025 18:18:00 +0000 (13:18 -0500)
It's possible for an unstable link to repeatedly bounce allowing a FLOGI
retry, but then bounce again forcing an abort of the FLOGI.  Ensure that
the initial reference count on the FLOGI ndlp is restored in this faulty
link scenario.

Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Link: https://patch.msgid.link/20251106224639.139176-8-justintee8345@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/lpfc/lpfc_els.c
drivers/scsi/lpfc/lpfc_hbadisc.c

index 00cfd4ac4ccd2788a3799baac979acea114cbfaf..0045c1e296197955df0418b5f5993360f23eb1f0 100644 (file)
@@ -934,10 +934,15 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
        /* Check to see if link went down during discovery */
        if (lpfc_els_chk_latt(vport)) {
                /* One additional decrement on node reference count to
-                * trigger the release of the node
+                * trigger the release of the node.  Make sure the ndlp
+                * is marked NLP_DROPPED.
                 */
-               if (!(ndlp->fc4_xpt_flags & SCSI_XPT_REGD))
+               if (!test_bit(NLP_IN_DEV_LOSS, &ndlp->nlp_flag) &&
+                   !test_bit(NLP_DROPPED, &ndlp->nlp_flag) &&
+                   !(ndlp->fc4_xpt_flags & SCSI_XPT_REGD)) {
+                       set_bit(NLP_DROPPED, &ndlp->nlp_flag);
                        lpfc_nlp_put(ndlp);
+               }
                goto out;
        }
 
@@ -995,9 +1000,10 @@ stop_rr_fcf_flogi:
                                        IOERR_LOOP_OPEN_FAILURE)))
                        lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS,
                                      "2858 FLOGI Status:x%x/x%x TMO"
-                                     ":x%x Data x%lx x%x\n",
+                                     ":x%x Data x%lx x%x x%lx x%x\n",
                                      ulp_status, ulp_word4, tmo,
-                                     phba->hba_flag, phba->fcf.fcf_flag);
+                                     phba->hba_flag, phba->fcf.fcf_flag,
+                                     ndlp->nlp_flag, ndlp->fc4_xpt_flags);
 
                /* Check for retry */
                if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
@@ -1015,14 +1021,17 @@ stop_rr_fcf_flogi:
                 * reference to trigger node release.
                 */
                if (!test_bit(NLP_IN_DEV_LOSS, &ndlp->nlp_flag) &&
-                   !(ndlp->fc4_xpt_flags & SCSI_XPT_REGD))
+                   !test_bit(NLP_DROPPED, &ndlp->nlp_flag) &&
+                   !(ndlp->fc4_xpt_flags & SCSI_XPT_REGD)) {
+                       set_bit(NLP_DROPPED, &ndlp->nlp_flag);
                        lpfc_nlp_put(ndlp);
+               }
 
                lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS,
                                 "0150 FLOGI Status:x%x/x%x "
-                                "xri x%x TMO:x%x refcnt %d\n",
+                                "xri x%x iotag x%x TMO:x%x refcnt %d\n",
                                 ulp_status, ulp_word4, cmdiocb->sli4_xritag,
-                                tmo, kref_read(&ndlp->kref));
+                                cmdiocb->iotag, tmo, kref_read(&ndlp->kref));
 
                /* If this is not a loop open failure, bail out */
                if (!(ulp_status == IOSTAT_LOCAL_REJECT &&
@@ -1279,6 +1288,19 @@ lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
        uint32_t tmo, did;
        int rc;
 
+       /* It's possible for lpfc to reissue a FLOGI on an ndlp that is marked
+        * NLP_DROPPED.  This happens when the FLOGI completed with the XB bit
+        * set causing lpfc to reference the ndlp until the XRI_ABORTED CQE is
+        * issued. The time window for the XRI_ABORTED CQE can be as much as
+        * 2*2*RA_TOV allowing for ndlp reuse of this type when the link is
+        * cycling quickly.  When true, restore the initial reference and remove
+        * the NLP_DROPPED flag as lpfc is retrying.
+        */
+       if (test_and_clear_bit(NLP_DROPPED, &ndlp->nlp_flag)) {
+               if (!lpfc_nlp_get(ndlp))
+                       return 1;
+       }
+
        cmdsize = (sizeof(uint32_t) + sizeof(struct serv_parm));
        elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
                                     ndlp->nlp_DID, ELS_CMD_FLOGI);
index 43d246c5c049cb3a7a7cf0e24e3fca17f769d6c2..717ae56c8e4bd9b024e71889aa543c554cca6e8f 100644 (file)
@@ -424,6 +424,7 @@ lpfc_check_nlp_post_devloss(struct lpfc_vport *vport,
                            struct lpfc_nodelist *ndlp)
 {
        if (test_and_clear_bit(NLP_IN_RECOV_POST_DEV_LOSS, &ndlp->save_flags)) {
+               clear_bit(NLP_DROPPED, &ndlp->nlp_flag);
                lpfc_nlp_get(ndlp);
                lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY | LOG_NODE,
                                 "8438 Devloss timeout reversed on DID x%x "
@@ -566,7 +567,8 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
                        return fcf_inuse;
                }
 
-               lpfc_nlp_put(ndlp);
+               if (!test_and_set_bit(NLP_DROPPED, &ndlp->nlp_flag))
+                       lpfc_nlp_put(ndlp);
                return fcf_inuse;
        }