]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
svcrdma: Tail iovec leaves an orphaned DMA mapping
authorChuck Lever <chuck.lever@oracle.com>
Tue, 13 Sep 2016 14:52:50 +0000 (10:52 -0400)
committerBen Hutchings <ben@decadent.org.uk>
Thu, 23 Feb 2017 03:54:03 +0000 (03:54 +0000)
commit cace564f8b6260e806f5e28d7f192fd0e0c603ed upstream.

The ctxt's count field is overloaded to mean the number of pages in
the ctxt->page array and the number of SGEs in the ctxt->sge array.
Typically these two numbers are the same.

However, when an inline RPC reply is constructed from an xdr_buf
with a tail iovec, the head and tail often occupy the same page,
but each are DMA mapped independently. In that case, ->count equals
the number of pages, but it does not equal the number of SGEs.
There's one more SGE, for the tail iovec. Hence there is one more
DMA mapping than there are pages in the ctxt->page array.

This isn't a real problem until the server's iommu is enabled. Then
each RPC reply that has content in that iovec orphans a DMA mapping
that consists of real resources.

krb5i and krb5p always populate that tail iovec. After a couple
million sent krb5i/p RPC replies, the NFS server starts behaving
erratically. Reboot is needed to clear the problem.

Fixes: 9d11b51ce7c1 ("svcrdma: Fix send_reply() scatter/gather set-up")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
[bwh: Backported to 3.16:
 - Adjust context
 - Drop changes to svc_rdma_bc_sendto()
 - s/xprt->sc_pd->local_dma_lkey/xprt->sc_dma_lkey/
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
include/linux/sunrpc/svc_rdma.h
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
net/sunrpc/xprtrdma/svc_rdma_sendto.c
net/sunrpc/xprtrdma/svc_rdma_transport.c

index 5cf99a0163681ec2a1bee5c6a97171e82f361c53..45a69411dafc8e4d7a0cc7fb20d631ff4c11b670 100644 (file)
@@ -83,6 +83,7 @@ struct svc_rdma_op_ctxt {
        unsigned long flags;
        enum dma_data_direction direction;
        int count;
+       unsigned int mapped_sges;
        struct ib_sge sge[RPCSVC_MAXPAGES];
        struct page *pages[RPCSVC_MAXPAGES];
 };
@@ -178,6 +179,14 @@ struct svcxprt_rdma {
 #define RPCRDMA_MAX_REQUESTS    16
 #define RPCRDMA_MAX_REQ_SIZE    4096
 
+/* Track DMA maps for this transport and context */
+static inline void svc_rdma_count_mappings(struct svcxprt_rdma *rdma,
+                                          struct svc_rdma_op_ctxt *ctxt)
+{
+       ctxt->mapped_sges++;
+       atomic_inc(&rdma->sc_dma_used);
+}
+
 /* svc_rdma_marshal.c */
 extern void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *,
                                      int *, int *);
index 1b6929583a347a67c3f1b25704d30ed1e0e98162..e15e9fa6a071b5e6b03e1d040b7b9edaae6e6664 100644 (file)
@@ -178,7 +178,7 @@ static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
                                           ctxt->sge[pno].addr);
                if (ret)
                        goto err;
-               atomic_inc(&xprt->sc_dma_used);
+               svc_rdma_count_mappings(xprt, ctxt);
 
                /* The lkey here is either a local dma lkey or a dma_mr lkey */
                ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
index 0cf9f439025e3c5db528042dbefa5f71e7bfe79e..f3cfabe598e1f4bceea114e5201d1d1739c6595b 100644 (file)
@@ -184,7 +184,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
                if (ib_dma_mapping_error(xprt->sc_cm_id->device,
                                         sge[sge_no].addr))
                        goto err;
-               atomic_inc(&xprt->sc_dma_used);
+               svc_rdma_count_mappings(xprt, ctxt);
                sge[sge_no].lkey = xprt->sc_dma_lkey;
                ctxt->count++;
                sge_off = 0;
@@ -411,7 +411,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
                            ctxt->sge[0].length, DMA_TO_DEVICE);
        if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
                goto err;
-       atomic_inc(&rdma->sc_dma_used);
+       svc_rdma_count_mappings(rdma, ctxt);
 
        ctxt->direction = DMA_TO_DEVICE;
 
@@ -427,7 +427,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
                if (ib_dma_mapping_error(rdma->sc_cm_id->device,
                                         ctxt->sge[sge_no].addr))
                        goto err;
-               atomic_inc(&rdma->sc_dma_used);
+               svc_rdma_count_mappings(rdma, ctxt);
                ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
                ctxt->sge[sge_no].length = sge_bytes;
        }
@@ -442,23 +442,9 @@ static int send_reply(struct svcxprt_rdma *rdma,
                ctxt->pages[page_no+1] = rqstp->rq_respages[page_no];
                ctxt->count++;
                rqstp->rq_respages[page_no] = NULL;
-               /*
-                * If there are more pages than SGE, terminate SGE
-                * list so that svc_rdma_unmap_dma doesn't attempt to
-                * unmap garbage.
-                */
-               if (page_no+1 >= sge_no)
-                       ctxt->sge[page_no+1].length = 0;
        }
        rqstp->rq_next_page = rqstp->rq_respages + 1;
 
-       /* The loop above bumps sc_dma_used for each sge. The
-        * xdr_buf.tail gets a separate sge, but resides in the
-        * same page as xdr_buf.head. Don't count it twice.
-        */
-       if (sge_no > ctxt->count)
-               atomic_dec(&rdma->sc_dma_used);
-
        BUG_ON(sge_no > rdma->sc_max_sge);
        memset(&send_wr, 0, sizeof send_wr);
        ctxt->wr_op = IB_WR_SEND;
index 06a5d9235107cb1cf92efd507b391e1adc677cc9..2f67c5ee9caf16f0a09a653c99857ccf2d2c4ceb 100644 (file)
@@ -108,6 +108,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
        ctxt->xprt = xprt;
        INIT_LIST_HEAD(&ctxt->dto_q);
        ctxt->count = 0;
+       ctxt->mapped_sges = 0;
        ctxt->frmr = NULL;
        atomic_inc(&xprt->sc_ctxt_used);
        return ctxt;
@@ -116,22 +117,27 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
 {
        struct svcxprt_rdma *xprt = ctxt->xprt;
-       int i;
-       for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
+       struct ib_device *device = xprt->sc_cm_id->device;
+       u32 lkey = xprt->sc_dma_lkey;
+       unsigned int i, count;
+
+       for (count = 0, i = 0; i < ctxt->mapped_sges; i++) {
                /*
                 * Unmap the DMA addr in the SGE if the lkey matches
                 * the sc_dma_lkey, otherwise, ignore it since it is
                 * an FRMR lkey and will be unmapped later when the
                 * last WR that uses it completes.
                 */
-               if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
-                       atomic_dec(&xprt->sc_dma_used);
-                       ib_dma_unmap_page(xprt->sc_cm_id->device,
+               if (ctxt->sge[i].lkey == lkey) {
+                       count++;
+                       ib_dma_unmap_page(device,
                                            ctxt->sge[i].addr,
                                            ctxt->sge[i].length,
                                            ctxt->direction);
                }
        }
+       ctxt->mapped_sges = 0;
+       atomic_sub(count, &xprt->sc_dma_used);
 }
 
 void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
@@ -521,7 +527,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
                                     DMA_FROM_DEVICE);
                if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
                        goto err_put_ctxt;
-               atomic_inc(&xprt->sc_dma_used);
+               svc_rdma_count_mappings(xprt, ctxt);
                ctxt->sge[sge_no].addr = pa;
                ctxt->sge[sge_no].length = PAGE_SIZE;
                ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
@@ -1346,7 +1352,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
                svc_rdma_put_context(ctxt, 1);
                return;
        }
-       atomic_inc(&xprt->sc_dma_used);
+       svc_rdma_count_mappings(xprt, ctxt);
        ctxt->sge[0].lkey = xprt->sc_dma_lkey;
        ctxt->sge[0].length = length;