]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
NFSD: Implement large extent array support in pNFS
authorSergey Bashirov <sergeybashirov@gmail.com>
Mon, 20 Oct 2025 12:53:03 +0000 (08:53 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 23 Oct 2025 14:20:41 +0000 (16:20 +0200)
[ Upstream commit f963cf2b91a30b5614c514f3ad53ca124cb65280 ]

When pNFS client in the block or scsi layout mode sends layoutcommit
to MDS, a variable length array of modified extents is supplied within
the request. This patch allows the server to accept such extent arrays
if they do not fit within single memory page.

The issue can be reproduced when writing to a 1GB file using FIO with
O_DIRECT, 4K block and large I/O depth without preallocation of the
file. In this case, the server returns NFSERR_BADXDR to the client.

Co-developed-by: Konstantin Evtushenko <koevtushenko@yandex.com>
Signed-off-by: Konstantin Evtushenko <koevtushenko@yandex.com>
Signed-off-by: Sergey Bashirov <sergeybashirov@gmail.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Stable-dep-of: d68886bae76a ("NFSD: Fix last write offset handling in layoutcommit")
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
fs/nfsd/blocklayout.c
fs/nfsd/blocklayoutxdr.c
fs/nfsd/blocklayoutxdr.h
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4xdr.c
fs/nfsd/pnfs.h
fs/nfsd/xdr4.h

index 19078a043e85c51a58764a2b94e190b47dd3ae1d..4c936132eb44035216c791720597594961495094 100644 (file)
@@ -173,16 +173,18 @@ nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
 }
 
 static __be32
-nfsd4_block_proc_layoutcommit(struct inode *inode,
+nfsd4_block_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
                struct nfsd4_layoutcommit *lcp)
 {
        struct iomap *iomaps;
        int nr_iomaps;
        __be32 nfserr;
 
-       nfserr = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
-                       lcp->lc_up_len, &iomaps, &nr_iomaps,
-                       i_blocksize(inode));
+       rqstp->rq_arg = lcp->lc_up_layout;
+       svcxdr_init_decode(rqstp);
+
+       nfserr = nfsd4_block_decode_layoutupdate(&rqstp->rq_arg_stream,
+                       &iomaps, &nr_iomaps, i_blocksize(inode));
        if (nfserr != nfs_ok)
                return nfserr;
 
@@ -313,16 +315,18 @@ nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb,
        return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp));
 }
 static __be32
-nfsd4_scsi_proc_layoutcommit(struct inode *inode,
+nfsd4_scsi_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
                struct nfsd4_layoutcommit *lcp)
 {
        struct iomap *iomaps;
        int nr_iomaps;
        __be32 nfserr;
 
-       nfserr = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
-                       lcp->lc_up_len, &iomaps, &nr_iomaps,
-                       i_blocksize(inode));
+       rqstp->rq_arg = lcp->lc_up_layout;
+       svcxdr_init_decode(rqstp);
+
+       nfserr = nfsd4_scsi_decode_layoutupdate(&rqstp->rq_arg_stream,
+                       &iomaps, &nr_iomaps, i_blocksize(inode));
        if (nfserr != nfs_ok)
                return nfserr;
 
index 18de37ff289166d2cb7b2bf61d290cfd450bcc2e..e50afe34073719413fbbd9de36de05440ab762ad 100644 (file)
@@ -113,8 +113,7 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
 
 /**
  * nfsd4_block_decode_layoutupdate - decode the block layout extent array
- * @p: pointer to the xdr data
- * @len: number of bytes to decode
+ * @xdr: subbuf set to the encoded array
  * @iomapp: pointer to store the decoded extent array
  * @nr_iomapsp: pointer to store the number of extents
  * @block_size: alignment of extent offset and length
@@ -127,25 +126,24 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
  *
  * Return values:
  *   %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid
- *   %nfserr_bad_xdr: The encoded array in @p is invalid
+ *   %nfserr_bad_xdr: The encoded array in @xdr is invalid
  *   %nfserr_inval: An unaligned extent found
  *   %nfserr_delay: Failed to allocate memory for @iomapp
  */
 __be32
-nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
+nfsd4_block_decode_layoutupdate(struct xdr_stream *xdr, struct iomap **iomapp,
                int *nr_iomapsp, u32 block_size)
 {
        struct iomap *iomaps;
-       u32 nr_iomaps, i;
+       u32 nr_iomaps, expected, len, i;
+       __be32 nfserr;
 
-       if (len < sizeof(u32))
-               return nfserr_bad_xdr;
-       len -= sizeof(u32);
-       if (len % PNFS_BLOCK_EXTENT_SIZE)
+       if (xdr_stream_decode_u32(xdr, &nr_iomaps))
                return nfserr_bad_xdr;
 
-       nr_iomaps = be32_to_cpup(p++);
-       if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE)
+       len = sizeof(__be32) + xdr_stream_remaining(xdr);
+       expected = sizeof(__be32) + nr_iomaps * PNFS_BLOCK_EXTENT_SIZE;
+       if (len != expected)
                return nfserr_bad_xdr;
 
        iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
@@ -155,21 +153,44 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
        for (i = 0; i < nr_iomaps; i++) {
                struct pnfs_block_extent bex;
 
-               p = svcxdr_decode_deviceid4(p, &bex.vol_id);
-               p = xdr_decode_hyper(p, &bex.foff);
+               if (nfsd4_decode_deviceid4(xdr, &bex.vol_id)) {
+                       nfserr = nfserr_bad_xdr;
+                       goto fail;
+               }
+
+               if (xdr_stream_decode_u64(xdr, &bex.foff)) {
+                       nfserr = nfserr_bad_xdr;
+                       goto fail;
+               }
                if (bex.foff & (block_size - 1)) {
+                       nfserr = nfserr_inval;
+                       goto fail;
+               }
+
+               if (xdr_stream_decode_u64(xdr, &bex.len)) {
+                       nfserr = nfserr_bad_xdr;
                        goto fail;
                }
-               p = xdr_decode_hyper(p, &bex.len);
                if (bex.len & (block_size - 1)) {
+                       nfserr = nfserr_inval;
+                       goto fail;
+               }
+
+               if (xdr_stream_decode_u64(xdr, &bex.soff)) {
+                       nfserr = nfserr_bad_xdr;
                        goto fail;
                }
-               p = xdr_decode_hyper(p, &bex.soff);
                if (bex.soff & (block_size - 1)) {
+                       nfserr = nfserr_inval;
+                       goto fail;
+               }
+
+               if (xdr_stream_decode_u32(xdr, &bex.es)) {
+                       nfserr = nfserr_bad_xdr;
                        goto fail;
                }
-               bex.es = be32_to_cpup(p++);
                if (bex.es != PNFS_BLOCK_READWRITE_DATA) {
+                       nfserr = nfserr_inval;
                        goto fail;
                }
 
@@ -182,13 +203,12 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
        return nfs_ok;
 fail:
        kfree(iomaps);
-       return nfserr_inval;
+       return nfserr;
 }
 
 /**
  * nfsd4_scsi_decode_layoutupdate - decode the scsi layout extent array
- * @p: pointer to the xdr data
- * @len: number of bytes to decode
+ * @xdr: subbuf set to the encoded array
  * @iomapp: pointer to store the decoded extent array
  * @nr_iomapsp: pointer to store the number of extents
  * @block_size: alignment of extent offset and length
@@ -200,21 +220,22 @@ fail:
  *
  * Return values:
  *   %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid
- *   %nfserr_bad_xdr: The encoded array in @p is invalid
+ *   %nfserr_bad_xdr: The encoded array in @xdr is invalid
  *   %nfserr_inval: An unaligned extent found
  *   %nfserr_delay: Failed to allocate memory for @iomapp
  */
 __be32
-nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
+nfsd4_scsi_decode_layoutupdate(struct xdr_stream *xdr, struct iomap **iomapp,
                int *nr_iomapsp, u32 block_size)
 {
        struct iomap *iomaps;
-       u32 nr_iomaps, expected, i;
+       u32 nr_iomaps, expected, len, i;
+       __be32 nfserr;
 
-       if (len < sizeof(u32))
+       if (xdr_stream_decode_u32(xdr, &nr_iomaps))
                return nfserr_bad_xdr;
 
-       nr_iomaps = be32_to_cpup(p++);
+       len = sizeof(__be32) + xdr_stream_remaining(xdr);
        expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE;
        if (len != expected)
                return nfserr_bad_xdr;
@@ -226,14 +247,22 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
        for (i = 0; i < nr_iomaps; i++) {
                u64 val;
 
-               p = xdr_decode_hyper(p, &val);
+               if (xdr_stream_decode_u64(xdr, &val)) {
+                       nfserr = nfserr_bad_xdr;
+                       goto fail;
+               }
                if (val & (block_size - 1)) {
+                       nfserr = nfserr_inval;
                        goto fail;
                }
                iomaps[i].offset = val;
 
-               p = xdr_decode_hyper(p, &val);
+               if (xdr_stream_decode_u64(xdr, &val)) {
+                       nfserr = nfserr_bad_xdr;
+                       goto fail;
+               }
                if (val & (block_size - 1)) {
+                       nfserr = nfserr_inval;
                        goto fail;
                }
                iomaps[i].length = val;
@@ -244,5 +273,5 @@ nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
        return nfs_ok;
 fail:
        kfree(iomaps);
-       return nfserr_inval;
+       return nfserr;
 }
index 15b3569f3d9ad36834a0c8bd52f6aea2ccaec2ba..7d25ef689671f7056e20cb2dd02bb794b128741a 100644 (file)
@@ -54,9 +54,9 @@ __be32 nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
                const struct nfsd4_getdeviceinfo *gdp);
 __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
                const struct nfsd4_layoutget *lgp);
-__be32 nfsd4_block_decode_layoutupdate(__be32 *p, u32 len,
+__be32 nfsd4_block_decode_layoutupdate(struct xdr_stream *xdr,
                struct iomap **iomapp, int *nr_iomapsp, u32 block_size);
-__be32 nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len,
+__be32 nfsd4_scsi_decode_layoutupdate(struct xdr_stream *xdr,
                struct iomap **iomapp, int *nr_iomapsp, u32 block_size);
 
 #endif /* _NFSD_BLOCKLAYOUTXDR_H */
index c01183ddc93f277f920f829ab35f5b8394143df1..294fede450a0126f9a4246298ce57671c89d6340 100644 (file)
@@ -2407,7 +2407,7 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
                lcp->lc_size_chg = false;
        }
 
-       nfserr = ops->proc_layoutcommit(inode, lcp);
+       nfserr = ops->proc_layoutcommit(inode, rqstp, lcp);
        nfs4_put_stid(&ls->ls_stid);
 out:
        return nfserr;
index bd5c8720ea7e365defdceb105a6f5e3073295d6c..66383eeeed15a0eed27cfa1f9c4140e5a006c1f1 100644 (file)
@@ -571,6 +571,8 @@ static __be32
 nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp,
                           struct nfsd4_layoutcommit *lcp)
 {
+       u32 len;
+
        if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_layout_type) < 0)
                return nfserr_bad_xdr;
        if (lcp->lc_layout_type < LAYOUT_NFSV4_1_FILES)
@@ -578,13 +580,10 @@ nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp,
        if (lcp->lc_layout_type >= LAYOUT_TYPE_MAX)
                return nfserr_bad_xdr;
 
-       if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_up_len) < 0)
+       if (xdr_stream_decode_u32(argp->xdr, &len) < 0)
+               return nfserr_bad_xdr;
+       if (!xdr_stream_subsegment(argp->xdr, &lcp->lc_up_layout, len))
                return nfserr_bad_xdr;
-       if (lcp->lc_up_len > 0) {
-               lcp->lc_up_layout = xdr_inline_decode(argp->xdr, lcp->lc_up_len);
-               if (!lcp->lc_up_layout)
-                       return nfserr_bad_xdr;
-       }
 
        return nfs_ok;
 }
index 925817f669176cc1ff3b7a1ddfc32429ee82745c..dfd411d1f363fd3e917f908ffe26d3aa517eeb86 100644 (file)
@@ -35,6 +35,7 @@ struct nfsd4_layout_ops {
                        const struct nfsd4_layoutget *lgp);
 
        __be32 (*proc_layoutcommit)(struct inode *inode,
+                       struct svc_rqst *rqstp,
                        struct nfsd4_layoutcommit *lcp);
 
        void (*fence_client)(struct nfs4_layout_stateid *ls,
index 83263bff27dc61ea718c4d3cf74cdd0186c6af2b..c75b295df206adaf23b95fafc45d4b3148be12f9 100644 (file)
@@ -665,8 +665,7 @@ struct nfsd4_layoutcommit {
        u64                     lc_last_wr;     /* request */
        struct timespec64       lc_mtime;       /* request */
        u32                     lc_layout_type; /* request */
-       u32                     lc_up_len;      /* layout length */
-       void                    *lc_up_layout;  /* decoded by callback */
+       struct xdr_buf          lc_up_layout;   /* decoded by callback */
        bool                    lc_size_chg;    /* response */
        u64                     lc_newsize;     /* response */
 };