]> git.ipfire.org Git - people/arne_f/kernel.git/commitdiff
xprtrdma: Fix corner cases when handling device removal
authorChuck Lever <chuck.lever@oracle.com>
Mon, 19 Mar 2018 18:23:16 +0000 (14:23 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 24 Apr 2018 07:43:00 +0000 (09:43 +0200)
commit 25524288631fc5b7d33259fca1e0dc38146be5d6 upstream.

Michal Kalderon has found some corner cases around device unload
with active NFS mounts that I didn't have the imagination to test
when xprtrdma device removal was added last year.

- The ULP device removal handler is responsible for deallocating
  the PD. That wasn't clear to me initially, and my own testing
  suggested it was not necessary, but that is incorrect.

- The transport destruction path can no longer assume that there
  is a valid ID.

- When destroying a transport, ensure that ib_free_cq() is not
  invoked on a CQ that was already released.

Reported-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Fixes: bebd031866ca ("xprtrdma: Support unplugging an HCA from ...")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: stable@vger.kernel.org # v4.12+
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
net/sunrpc/xprtrdma/verbs.c

index e6f84a6434a049e41d83092a0764668a21416d4c..25b0ecbd37e29e78d2611963769ee6dc1b0a9052 100644 (file)
@@ -250,7 +250,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
                wait_for_completion(&ia->ri_remove_done);
 
                ia->ri_id = NULL;
-               ia->ri_pd = NULL;
                ia->ri_device = NULL;
                /* Return 1 to ensure the core destroys the id. */
                return 1;
@@ -445,7 +444,9 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
                ia->ri_id->qp = NULL;
        }
        ib_free_cq(ep->rep_attr.recv_cq);
+       ep->rep_attr.recv_cq = NULL;
        ib_free_cq(ep->rep_attr.send_cq);
+       ep->rep_attr.send_cq = NULL;
 
        /* The ULP is responsible for ensuring all DMA
         * mappings and MRs are gone.
@@ -458,6 +459,8 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
                rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
        }
        rpcrdma_mrs_destroy(buf);
+       ib_dealloc_pd(ia->ri_pd);
+       ia->ri_pd = NULL;
 
        /* Allow waiters to continue */
        complete(&ia->ri_remove_done);
@@ -628,14 +631,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
 {
        cancel_delayed_work_sync(&ep->rep_connect_worker);
 
-       if (ia->ri_id->qp) {
+       if (ia->ri_id && ia->ri_id->qp) {
                rpcrdma_ep_disconnect(ep, ia);
                rdma_destroy_qp(ia->ri_id);
                ia->ri_id->qp = NULL;
        }
 
-       ib_free_cq(ep->rep_attr.recv_cq);
-       ib_free_cq(ep->rep_attr.send_cq);
+       if (ep->rep_attr.recv_cq)
+               ib_free_cq(ep->rep_attr.recv_cq);
+       if (ep->rep_attr.send_cq)
+               ib_free_cq(ep->rep_attr.send_cq);
 }
 
 /* Re-establish a connection after a device removal event.