]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
scsi: lpfc: Revise ndlp kref handling for dev_loss_tmo_callbk and lpfc_drop_node
authorJustin Tee <justin.tee@broadcom.com>
Wed, 12 Jul 2023 18:05:15 +0000 (11:05 -0700)
committerMartin K. Petersen <martin.petersen@oracle.com>
Sun, 23 Jul 2023 20:17:07 +0000 (16:17 -0400)
The ndlp kref count implementation in lpfc_dev_loss_tmo_callbk() removes
the initial node reference when a vport is unloading.  When lpfc_cleanup()
sends a DEVICE_RM event and is in NPR state, the driver calls
lpfc_drop_node().  Subsequently, lpfc_drop_node() also removes an ndlp kref
thinking it is the initial reference.  This unintentionally introduces an
extra kref decrement on the ndlp object.

Fix by using the NLP_DROPPED node flag in lpfc_dev_loss_tmo_callbk() and
lpfc_drop_node() to coordinate the removal of the initial node reference.

In lpfc_dev_loss_tmo_callbk(), remove the SCSI transport reference provided
the node is registered in the dev_loss context because the driver cannot
call the SCSI transport in dev_loss context or afterwards.  And, have
lpfc_drop_node() not remove a reference if another thread is acting or has
already acted on it.

Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Link: https://lore.kernel.org/r/20230712180522.112722-6-justintee8345@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/lpfc/lpfc_hbadisc.c
drivers/scsi/lpfc/lpfc_nvme.c

index b4303254744ab97324665b1b135a976364f59f52..388a481c811824cd34c74daff635665ce2454b22 100644 (file)
@@ -169,29 +169,44 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
 
        lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
                         "3181 dev_loss_callbk x%06x, rport x%px flg x%x "
-                        "load_flag x%x refcnt %d state %d xpt x%x\n",
+                        "load_flag x%x refcnt %u state %d xpt x%x\n",
                         ndlp->nlp_DID, ndlp->rport, ndlp->nlp_flag,
                         vport->load_flag, kref_read(&ndlp->kref),
                         ndlp->nlp_state, ndlp->fc4_xpt_flags);
 
-       /* Don't schedule a worker thread event if the vport is going down.
-        * The teardown process cleans up the node via lpfc_drop_node.
-        */
+       /* Don't schedule a worker thread event if the vport is going down. */
        if (vport->load_flag & FC_UNLOADING) {
-               ((struct lpfc_rport_data *)rport->dd_data)->pnode = NULL;
+               spin_lock_irqsave(&ndlp->lock, iflags);
                ndlp->rport = NULL;
 
-               ndlp->fc4_xpt_flags &= ~SCSI_XPT_REGD;
-               /* clear the NLP_XPT_REGD if the node is not registered
-                * with nvme-fc
+               /* The scsi_transport is done with the rport so lpfc cannot
+                * call to unregister. Remove the scsi transport reference
+                * and clean up the SCSI transport node details.
                 */
-               if (ndlp->fc4_xpt_flags == NLP_XPT_REGD)
-                       ndlp->fc4_xpt_flags &= ~NLP_XPT_REGD;
+               if (ndlp->fc4_xpt_flags & (NLP_XPT_REGD | SCSI_XPT_REGD)) {
+                       ndlp->fc4_xpt_flags &= ~SCSI_XPT_REGD;
 
-               /* Remove the node reference from remote_port_add now.
-                * The driver will not call remote_port_delete.
+                       /* NVME transport-registered rports need the
+                        * NLP_XPT_REGD flag to complete an unregister.
+                        */
+                       if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD))
+                               ndlp->fc4_xpt_flags &= ~NLP_XPT_REGD;
+                       spin_unlock_irqrestore(&ndlp->lock, iflags);
+                       lpfc_nlp_put(ndlp);
+                       spin_lock_irqsave(&ndlp->lock, iflags);
+               }
+
+               /* Only 1 thread can drop the initial node reference.  If
+                * another thread has set NLP_DROPPED, this thread is done.
                 */
-               lpfc_nlp_put(ndlp);
+               if (!(ndlp->nlp_flag & NLP_DROPPED)) {
+                       ndlp->nlp_flag |= NLP_DROPPED;
+                       spin_unlock_irqrestore(&ndlp->lock, iflags);
+                       lpfc_nlp_put(ndlp);
+                       spin_lock_irqsave(&ndlp->lock, iflags);
+               }
+
+               spin_unlock_irqrestore(&ndlp->lock, iflags);
                return;
        }
 
@@ -4686,7 +4701,8 @@ lpfc_nlp_unreg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
        spin_lock_irqsave(&ndlp->lock, iflags);
        if (!(ndlp->fc4_xpt_flags & NLP_XPT_REGD)) {
                spin_unlock_irqrestore(&ndlp->lock, iflags);
-               lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
+               lpfc_printf_vlog(vport, KERN_INFO,
+                                LOG_ELS | LOG_NODE | LOG_DISCOVERY,
                                 "0999 %s Not regd: ndlp x%px rport x%px DID "
                                 "x%x FLG x%x XPT x%x\n",
                                  __func__, ndlp, ndlp->rport, ndlp->nlp_DID,
@@ -4702,9 +4718,10 @@ lpfc_nlp_unreg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                vport->phba->nport_event_cnt++;
                lpfc_unregister_remote_port(ndlp);
        } else if (!ndlp->rport) {
-               lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
+               lpfc_printf_vlog(vport, KERN_INFO,
+                                LOG_ELS | LOG_NODE | LOG_DISCOVERY,
                                 "1999 %s NDLP in devloss x%px DID x%x FLG x%x"
-                                " XPT x%x refcnt %d\n",
+                                " XPT x%x refcnt %u\n",
                                 __func__, ndlp, ndlp->nlp_DID, ndlp->nlp_flag,
                                 ndlp->fc4_xpt_flags,
                                 kref_read(&ndlp->kref));
@@ -4954,22 +4971,29 @@ lpfc_drop_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 {
        /*
         * Use of lpfc_drop_node and UNUSED list: lpfc_drop_node should
-        * be used if we wish to issue the "last" lpfc_nlp_put() to remove
-        * the ndlp from the vport. The ndlp marked as UNUSED on the list
-        * until ALL other outstanding threads have completed. We check
-        * that the ndlp not already in the UNUSED state before we proceed.
+        * be used when lpfc wants to remove the "last" lpfc_nlp_put() to
+        * release the ndlp from the vport when conditions are correct.
         */
        if (ndlp->nlp_state == NLP_STE_UNUSED_NODE)
                return;
        lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE);
-       ndlp->nlp_flag |= NLP_DROPPED;
        if (vport->phba->sli_rev == LPFC_SLI_REV4) {
                lpfc_cleanup_vports_rrqs(vport, ndlp);
                lpfc_unreg_rpi(vport, ndlp);
        }
 
-       lpfc_nlp_put(ndlp);
-       return;
+       /* NLP_DROPPED means another thread already removed the initial
+        * reference from lpfc_nlp_init.  If set, don't drop it again and
+        * introduce an imbalance.
+        */
+       spin_lock_irq(&ndlp->lock);
+       if (!(ndlp->nlp_flag & NLP_DROPPED)) {
+               ndlp->nlp_flag |= NLP_DROPPED;
+               spin_unlock_irq(&ndlp->lock);
+               lpfc_nlp_put(ndlp);
+               return;
+       }
+       spin_unlock_irq(&ndlp->lock);
 }
 
 /*
index 3ee5cde481f3112792dbca594871d42a3b4dc437..39acbcb7ec66a12e246ed3f92699fa5afe430c7e 100644 (file)
@@ -2503,8 +2503,9 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
                lpfc_printf_vlog(vport, KERN_ERR,
                                 LOG_TRACE_EVENT,
                                 "6031 RemotePort Registration failed "
-                                "err: %d, DID x%06x\n",
-                                ret, ndlp->nlp_DID);
+                                "err: %d, DID x%06x ref %u\n",
+                                ret, ndlp->nlp_DID, kref_read(&ndlp->kref));
+               lpfc_nlp_put(ndlp);
        }
 
        return ret;