]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
xprtrdma: Fix list corruption / DMAR errors during MR recovery
authorChuck Lever <chuck.lever@oracle.com>
Tue, 1 May 2018 15:37:14 +0000 (11:37 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 20 Jun 2018 19:01:32 +0000 (04:01 +0900)
[ Upstream commit 054f155721d7af1f343ed52bea246626d8450ca8 ]

The ro_release_mr methods check whether mr->mr_list is empty.
Therefore, be sure to always use list_del_init when removing an MR
linked into a list using that field. Otherwise, when recovering from
transport failures or device removal, list corruption can result, or
MRs can get mapped or unmapped an odd number of times, resulting in
IOMMU-related failures.

In general this fix is appropriate back to v4.8. However, code
changes since then make it impossible to apply this patch directly
to stable kernels. The fix would have to be applied by hand or
reworked for kernels earlier than v4.16.

Backport guidance -- there are several cases:
- When creating an MR, initialize mr_list so that using list_empty
  on an as-yet-unused MR is safe.
- When an MR is being handled by the remote invalidation path,
  ensure that mr_list is reinitialized when it is removed from
  rl_registered.
- When an MR is being handled by rpcrdma_destroy_mrs, it is removed
  from mr_all, but it may still be on an rl_registered list. In
  that case, the MR needs to be removed from that list before being
  released.
- Other cases are covered by using list_del_init in rpcrdma_mr_pop.

Fixes: 9d6b04097882 ('xprtrdma: Place registered MWs on a ... ')
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
net/sunrpc/xprtrdma/fmr_ops.c
net/sunrpc/xprtrdma/frwr_ops.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h

index d5f95bb39300e61ac2e3e4f9ddd83e947135473f..5679b5374dfb80e91f14a42994d2e532db0bf298 100644 (file)
@@ -72,6 +72,7 @@ fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
        if (IS_ERR(mr->fmr.fm_mr))
                goto out_fmr_err;
 
+       INIT_LIST_HEAD(&mr->mr_list);
        return 0;
 
 out_fmr_err:
@@ -102,10 +103,6 @@ fmr_op_release_mr(struct rpcrdma_mr *mr)
        LIST_HEAD(unmap_list);
        int rc;
 
-       /* Ensure MW is not on any rl_registered list */
-       if (!list_empty(&mr->mr_list))
-               list_del(&mr->mr_list);
-
        kfree(mr->fmr.fm_physaddrs);
        kfree(mr->mr_sg);
 
index 90f688f19783d089fe90fffc40f104bb6e43a94c..4d11dc5190b8c77a4a88b6f6bd81f293113e36b7 100644 (file)
@@ -110,6 +110,7 @@ frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
        if (!mr->mr_sg)
                goto out_list_err;
 
+       INIT_LIST_HEAD(&mr->mr_list);
        sg_init_table(mr->mr_sg, depth);
        init_completion(&frwr->fr_linv_done);
        return 0;
@@ -133,10 +134,6 @@ frwr_op_release_mr(struct rpcrdma_mr *mr)
 {
        int rc;
 
-       /* Ensure MR is not on any rl_registered list */
-       if (!list_empty(&mr->mr_list))
-               list_del(&mr->mr_list);
-
        rc = ib_dereg_mr(mr->frwr.fr_mr);
        if (rc)
                pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
@@ -195,7 +192,7 @@ frwr_op_recover_mr(struct rpcrdma_mr *mr)
        return;
 
 out_release:
-       pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mr);
+       pr_err("rpcrdma: FRWR reset failed %d, %p released\n", rc, mr);
        r_xprt->rx_stats.mrs_orphaned++;
 
        spin_lock(&r_xprt->rx_buf.rb_mrlock);
@@ -458,7 +455,7 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
 
        list_for_each_entry(mr, mrs, mr_list)
                if (mr->mr_handle == rep->rr_inv_rkey) {
-                       list_del(&mr->mr_list);
+                       list_del_init(&mr->mr_list);
                        trace_xprtrdma_remoteinv(mr);
                        mr->frwr.fr_state = FRWR_IS_INVALID;
                        rpcrdma_mr_unmap_and_put(mr);
index 25b0ecbd37e29e78d2611963769ee6dc1b0a9052..20ad7bc1021c19963d1a6937b92bf5210d7ad8ef 100644 (file)
@@ -1244,6 +1244,11 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
                list_del(&mr->mr_all);
 
                spin_unlock(&buf->rb_mrlock);
+
+               /* Ensure MW is not on any rl_registered list */
+               if (!list_empty(&mr->mr_list))
+                       list_del(&mr->mr_list);
+
                ia->ri_ops->ro_release_mr(mr);
                count++;
                spin_lock(&buf->rb_mrlock);
index 430a6de8300e50849514bc22e6c110c93fed3675..99c96bf33fce766fa8ab35e05315906279dd7ff4 100644 (file)
@@ -381,7 +381,7 @@ rpcrdma_mr_pop(struct list_head *list)
        struct rpcrdma_mr *mr;
 
        mr = list_first_entry(list, struct rpcrdma_mr, mr_list);
-       list_del(&mr->mr_list);
+       list_del_init(&mr->mr_list);
        return mr;
 }