5.4-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 5 Oct 2022 10:35:34 +0000 (12:35 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 5 Oct 2022 10:35:34 +0000 (12:35 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 5 Oct 2022 10:35:34 +0000 (12:35 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 5 Oct 2022 10:35:34 +0000 (12:35 +0200)
diff --git a/queue-5.4/series b/queue-5.4/series

index 0ecfa1d21908e6c97029ab031dc76614fcdbf9fd..5056b02a0e1487a5fd0c6d5bb17aac0ce3cf45c2 100644 (file)
--- a/queue-5.4/series
+++ b/queue-5.4/series
@@ -35,3 +35,14 @@ x86-speculation-disable-rrsba-behavior.patch
  x86-speculation-use-declare_per_cpu-for-x86_spec_ctrl_current.patch
  x86-bugs-warn-when-ibrs-mitigation-is-selected-on-enhanced-ibrs-parts.patch
  x86-speculation-add-rsb-vm-exit-protections.patch
+xfs-fix-misuse-of-the-xfs_attr_incomplete-flag.patch
+xfs-introduce-xfs_max_fileoff.patch
+xfs-truncate-should-remove-all-blocks-not-just-to-the-end-of-the-page-cache.patch
+xfs-fix-s_maxbytes-computation-on-32-bit-kernels.patch
+xfs-fix-iocb_nowait-handling-in-xfs_file_dio_aio_read.patch
+xfs-refactor-remote-attr-value-buffer-invalidation.patch
+xfs-fix-memory-corruption-during-remote-attr-value-buffer-invalidation.patch
+xfs-move-incore-structures-out-of-xfs_da_format.h.patch
+xfs-streamline-xfs_attr3_leaf_inactive.patch
+xfs-fix-uninitialized-variable-in-xfs_attr3_leaf_inactive.patch
+xfs-remove-unused-variable-done.patch
diff --git a/queue-5.4/xfs-fix-iocb_nowait-handling-in-xfs_file_dio_aio_read.patch b/queue-5.4/xfs-fix-iocb_nowait-handling-in-xfs_file_dio_aio_read.patch

new file mode 100644 (file)

index 0000000..98f02ce
--- /dev/null
+++ b/queue-5.4/xfs-fix-iocb_nowait-handling-in-xfs_file_dio_aio_read.patch
@@ -0,0 +1,42 @@
+From foo@baz Wed Oct  5 12:34:43 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed,  5 Oct 2022 12:30:59 +0530
+Subject: xfs: fix IOCB_NOWAIT handling in xfs_file_dio_aio_read
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221005070105.41929-6-chandan.babu@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 7b53b868a1812a9a6ab5e69249394bd37f29ce2c upstream.
+
+Direct I/O reads can also be used with RWF_NOWAIT & co.  Fix the inode
+locking in xfs_file_dio_aio_read to take IOCB_NOWAIT into account.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_file.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -187,7 +187,12 @@ xfs_file_dio_aio_read(
+ 
+       file_accessed(iocb->ki_filp);
+ 
+-      xfs_ilock(ip, XFS_IOLOCK_SHARED);
++      if (iocb->ki_flags & IOCB_NOWAIT) {
++              if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
++                      return -EAGAIN;
++      } else {
++              xfs_ilock(ip, XFS_IOLOCK_SHARED);
++      }
+       ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL);
+       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+ 
diff --git a/queue-5.4/xfs-fix-memory-corruption-during-remote-attr-value-buffer-invalidation.patch b/queue-5.4/xfs-fix-memory-corruption-during-remote-attr-value-buffer-invalidation.patch

new file mode 100644 (file)

index 0000000..b0b31e8
--- /dev/null
+++ b/queue-5.4/xfs-fix-memory-corruption-during-remote-attr-value-buffer-invalidation.patch
@@ -0,0 +1,192 @@
+From foo@baz Wed Oct  5 12:34:43 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed,  5 Oct 2022 12:31:01 +0530
+Subject: xfs: fix memory corruption during remote attr value buffer invalidation
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221005070105.41929-8-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit e8db2aafcedb7d88320ab83f1000f1606b26d4d7 upstream.
+
+[Replaced XFS_IS_CORRUPT() calls with ASSERT() for 5.4.y backport]
+
+While running generic/103, I observed what looks like memory corruption
+and (with slub debugging turned on) a slub redzone warning on i386 when
+inactivating an inode with a 64k remote attr value.
+
+On a v5 filesystem, maximally sized remote attr values require one block
+more than 64k worth of space to hold both the remote attribute value
+header (64 bytes).  On a 4k block filesystem this results in a 68k
+buffer; on a 64k block filesystem, this would be a 128k buffer.  Note
+that even though we'll never use more than 65,600 bytes of this buffer,
+XFS_MAX_BLOCKSIZE is 64k.
+
+This is a problem because the definition of struct xfs_buf_log_format
+allows for XFS_MAX_BLOCKSIZE worth of dirty bitmap (64k).  On i386 when we
+invalidate a remote attribute, xfs_trans_binval zeroes all 68k worth of
+the dirty map, writing right off the end of the log item and corrupting
+memory.  We've gotten away with this on x86_64 for years because the
+compiler inserts a u32 padding on the end of struct xfs_buf_log_format.
+
+Fortunately for us, remote attribute values are written to disk with
+xfs_bwrite(), which is to say that they are not logged.  Fix the problem
+by removing all places where we could end up creating a buffer log item
+for a remote attribute value and leave a note explaining why.  Next,
+replace the open-coded buffer invalidation with a call to the helper we
+created in the previous patch that does better checking for bad metadata
+before marking the buffer stale.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_attr_remote.c |   37 ++++++++++++++++++++++++++-----
+ fs/xfs/xfs_attr_inactive.c      |   47 +++++++++++-----------------------------
+ 2 files changed, 44 insertions(+), 40 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_attr_remote.c
++++ b/fs/xfs/libxfs/xfs_attr_remote.c
+@@ -25,6 +25,23 @@
+ #define ATTR_RMTVALUE_MAPSIZE 1       /* # of map entries at once */
+ 
+ /*
++ * Remote Attribute Values
++ * =======================
++ *
++ * Remote extended attribute values are conceptually simple -- they're written
++ * to data blocks mapped by an inode's attribute fork, and they have an upper
++ * size limit of 64k.  Setting a value does not involve the XFS log.
++ *
++ * However, on a v5 filesystem, maximally sized remote attr values require one
++ * block more than 64k worth of space to hold both the remote attribute value
++ * header (64 bytes).  On a 4k block filesystem this results in a 68k buffer;
++ * on a 64k block filesystem, this would be a 128k buffer.  Note that the log
++ * format can only handle a dirty buffer of XFS_MAX_BLOCKSIZE length (64k).
++ * Therefore, we /must/ ensure that remote attribute value buffers never touch
++ * the logging system and therefore never have a log item.
++ */
++
++/*
+  * Each contiguous block has a header, so it is not just a simple attribute
+  * length to FSB conversion.
+  */
+@@ -400,17 +417,25 @@ xfs_attr_rmtval_get(
+                              (map[i].br_startblock != HOLESTARTBLOCK));
+                       dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
+                       dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
+-                      error = xfs_trans_read_buf(mp, args->trans,
+-                                                 mp->m_ddev_targp,
+-                                                 dblkno, dblkcnt, 0, &bp,
+-                                                 &xfs_attr3_rmt_buf_ops);
+-                      if (error)
++                      bp = xfs_buf_read(mp->m_ddev_targp, dblkno, dblkcnt, 0,
++                                      &xfs_attr3_rmt_buf_ops);
++                      if (!bp)
++                              return -ENOMEM;
++                      error = bp->b_error;
++                      if (error) {
++                              xfs_buf_ioerror_alert(bp, __func__);
++                              xfs_buf_relse(bp);
++
++                              /* bad CRC means corrupted metadata */
++                              if (error == -EFSBADCRC)
++                                      error = -EFSCORRUPTED;
+                               return error;
++                      }
+ 
+                       error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
+                                                       &offset, &valuelen,
+                                                       &dst);
+-                      xfs_trans_brelse(args->trans, bp);
++                      xfs_buf_relse(bp);
+                       if (error)
+                               return error;
+ 
+--- a/fs/xfs/xfs_attr_inactive.c
++++ b/fs/xfs/xfs_attr_inactive.c
+@@ -25,22 +25,20 @@
+ #include "xfs_error.h"
+ 
+ /*
+- * Look at all the extents for this logical region,
+- * invalidate any buffers that are incore/in transactions.
++ * Invalidate any incore buffers associated with this remote attribute value
++ * extent.   We never log remote attribute value buffers, which means that they
++ * won't be attached to a transaction and are therefore safe to mark stale.
++ * The actual bunmapi will be taken care of later.
+  */
+ STATIC int
+-xfs_attr3_leaf_freextent(
+-      struct xfs_trans        **trans,
++xfs_attr3_rmt_stale(
+       struct xfs_inode        *dp,
+       xfs_dablk_t             blkno,
+       int                     blkcnt)
+ {
+       struct xfs_bmbt_irec    map;
+-      struct xfs_buf          *bp;
+       xfs_dablk_t             tblkno;
+-      xfs_daddr_t             dblkno;
+       int                     tblkcnt;
+-      int                     dblkcnt;
+       int                     nmap;
+       int                     error;
+ 
+@@ -57,35 +55,18 @@ xfs_attr3_leaf_freextent(
+               nmap = 1;
+               error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt,
+                                      &map, &nmap, XFS_BMAPI_ATTRFORK);
+-              if (error) {
++              if (error)
+                       return error;
+-              }
+               ASSERT(nmap == 1);
+-              ASSERT(map.br_startblock != DELAYSTARTBLOCK);
+ 
+               /*
+-               * If it's a hole, these are already unmapped
+-               * so there's nothing to invalidate.
++               * Mark any incore buffers for the remote value as stale.  We
++               * never log remote attr value buffers, so the buffer should be
++               * easy to kill.
+                */
+-              if (map.br_startblock != HOLESTARTBLOCK) {
+-
+-                      dblkno = XFS_FSB_TO_DADDR(dp->i_mount,
+-                                                map.br_startblock);
+-                      dblkcnt = XFS_FSB_TO_BB(dp->i_mount,
+-                                              map.br_blockcount);
+-                      bp = xfs_trans_get_buf(*trans,
+-                                      dp->i_mount->m_ddev_targp,
+-                                      dblkno, dblkcnt, 0);
+-                      if (!bp)
+-                              return -ENOMEM;
+-                      xfs_trans_binval(*trans, bp);
+-                      /*
+-                       * Roll to next transaction.
+-                       */
+-                      error = xfs_trans_roll_inode(trans, dp);
+-                      if (error)
+-                              return error;
+-              }
++              error = xfs_attr_rmtval_stale(dp, &map, 0);
++              if (error)
++                      return error;
+ 
+               tblkno += map.br_blockcount;
+               tblkcnt -= map.br_blockcount;
+@@ -174,9 +155,7 @@ xfs_attr3_leaf_inactive(
+        */
+       error = 0;
+       for (lp = list, i = 0; i < count; i++, lp++) {
+-              tmp = xfs_attr3_leaf_freextent(trans, dp,
+-                              lp->valueblk, lp->valuelen);
+-
++              tmp = xfs_attr3_rmt_stale(dp, lp->valueblk, lp->valuelen);
+               if (error == 0)
+                       error = tmp;    /* save only the 1st errno */
+       }
diff --git a/queue-5.4/xfs-fix-misuse-of-the-xfs_attr_incomplete-flag.patch b/queue-5.4/xfs-fix-misuse-of-the-xfs_attr_incomplete-flag.patch

new file mode 100644 (file)

index 0000000..2e75b1b
--- /dev/null
+++ b/queue-5.4/xfs-fix-misuse-of-the-xfs_attr_incomplete-flag.patch
@@ -0,0 +1,86 @@
+From foo@baz Wed Oct  5 12:34:42 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed,  5 Oct 2022 12:30:55 +0530
+Subject: xfs: fix misuse of the XFS_ATTR_INCOMPLETE flag
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221005070105.41929-2-chandan.babu@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 780d29057781d986cd87dbbe232cd02876ad430f upstream.
+
+XFS_ATTR_INCOMPLETE is a flag in the on-disk attribute format, and thus
+in a different namespace as the ATTR_* flags in xfs_da_args.flags.
+Switch to using a XFS_DA_OP_INCOMPLETE flag in op_flags instead.  Without
+this users might be able to inject this flag into operations using the
+attr by handle ioctl.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_attr.c      |    2 +-
+ fs/xfs/libxfs/xfs_attr_leaf.c |    4 ++--
+ fs/xfs/libxfs/xfs_da_btree.h  |    4 +++-
+ fs/xfs/libxfs/xfs_da_format.h |    2 --
+ 4 files changed, 6 insertions(+), 6 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_attr.c
++++ b/fs/xfs/libxfs/xfs_attr.c
+@@ -1007,7 +1007,7 @@ restart:
+                * The INCOMPLETE flag means that we will find the "old"
+                * attr, not the "new" one.
+                */
+-              args->flags |= XFS_ATTR_INCOMPLETE;
++              args->op_flags |= XFS_DA_OP_INCOMPLETE;
+               state = xfs_da_state_alloc();
+               state->args = args;
+               state->mp = mp;
+--- a/fs/xfs/libxfs/xfs_attr_leaf.c
++++ b/fs/xfs/libxfs/xfs_attr_leaf.c
+@@ -2345,8 +2345,8 @@ xfs_attr3_leaf_lookup_int(
+                * If we are looking for INCOMPLETE entries, show only those.
+                * If we are looking for complete entries, show only those.
+                */
+-              if ((args->flags & XFS_ATTR_INCOMPLETE) !=
+-                  (entry->flags & XFS_ATTR_INCOMPLETE)) {
++              if (!!(args->op_flags & XFS_DA_OP_INCOMPLETE) !=
++                  !!(entry->flags & XFS_ATTR_INCOMPLETE)) {
+                       continue;
+               }
+               if (entry->flags & XFS_ATTR_LOCAL) {
+--- a/fs/xfs/libxfs/xfs_da_btree.h
++++ b/fs/xfs/libxfs/xfs_da_btree.h
+@@ -82,6 +82,7 @@ typedef struct xfs_da_args {
+ #define XFS_DA_OP_OKNOENT     0x0008  /* lookup/add op, ENOENT ok, else die */
+ #define XFS_DA_OP_CILOOKUP    0x0010  /* lookup to return CI name if found */
+ #define XFS_DA_OP_ALLOCVAL    0x0020  /* lookup to alloc buffer if found  */
++#define XFS_DA_OP_INCOMPLETE  0x0040  /* lookup INCOMPLETE attr keys */
+ 
+ #define XFS_DA_OP_FLAGS \
+       { XFS_DA_OP_JUSTCHECK,  "JUSTCHECK" }, \
+@@ -89,7 +90,8 @@ typedef struct xfs_da_args {
+       { XFS_DA_OP_ADDNAME,    "ADDNAME" }, \
+       { XFS_DA_OP_OKNOENT,    "OKNOENT" }, \
+       { XFS_DA_OP_CILOOKUP,   "CILOOKUP" }, \
+-      { XFS_DA_OP_ALLOCVAL,   "ALLOCVAL" }
++      { XFS_DA_OP_ALLOCVAL,   "ALLOCVAL" }, \
++      { XFS_DA_OP_INCOMPLETE, "INCOMPLETE" }
+ 
+ /*
+  * Storage for holding state during Btree searches and split/join ops.
+--- a/fs/xfs/libxfs/xfs_da_format.h
++++ b/fs/xfs/libxfs/xfs_da_format.h
+@@ -740,8 +740,6 @@ struct xfs_attr3_icleaf_hdr {
+ 
+ /*
+  * Flags used in the leaf_entry[i].flags field.
+- * NOTE: the INCOMPLETE bit must not collide with the flags bits specified
+- * on the system call, they are "or"ed together for various operations.
+  */
+ #define       XFS_ATTR_LOCAL_BIT      0       /* attr is stored locally */
+ #define       XFS_ATTR_ROOT_BIT       1       /* limit access to trusted attrs */
diff --git a/queue-5.4/xfs-fix-s_maxbytes-computation-on-32-bit-kernels.patch b/queue-5.4/xfs-fix-s_maxbytes-computation-on-32-bit-kernels.patch

new file mode 100644 (file)

index 0000000..71b2392
--- /dev/null
+++ b/queue-5.4/xfs-fix-s_maxbytes-computation-on-32-bit-kernels.patch
@@ -0,0 +1,111 @@
+From foo@baz Wed Oct  5 12:34:43 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed,  5 Oct 2022 12:30:58 +0530
+Subject: xfs: fix s_maxbytes computation on 32-bit kernels
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221005070105.41929-5-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 932befe39ddea29cf47f4f1dc080d3dba668f0ca upstream.
+
+I observed a hang in generic/308 while running fstests on a i686 kernel.
+The hang occurred when trying to purge the pagecache on a large sparse
+file that had a page created past MAX_LFS_FILESIZE, which caused an
+integer overflow in the pagecache xarray and resulted in an infinite
+loop.
+
+I then noticed that Linus changed the definition of MAX_LFS_FILESIZE in
+commit 0cc3b0ec23ce ("Clarify (and fix) MAX_LFS_FILESIZE macros") so
+that it is now one page short of the maximum page index on 32-bit
+kernels.  Because the XFS function to compute max offset open-codes the
+2005-era MAX_LFS_FILESIZE computation and neither the vfs nor mm perform
+any sanity checking of s_maxbytes, the code in generic/308 can create a
+page above the pagecache's limit and kaboom.
+
+Fix all this by setting s_maxbytes to MAX_LFS_FILESIZE directly and
+aborting the mount with a warning if our assumptions ever break.  I have
+no answer for why this seems to have been broken for years and nobody
+noticed.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_super.c |   48 +++++++++++++++++++++---------------------------
+ 1 file changed, 21 insertions(+), 27 deletions(-)
+
+--- a/fs/xfs/xfs_super.c
++++ b/fs/xfs/xfs_super.c
+@@ -512,32 +512,6 @@ xfs_showargs(
+               seq_puts(m, ",noquota");
+ }
+ 
+-static uint64_t
+-xfs_max_file_offset(
+-      unsigned int            blockshift)
+-{
+-      unsigned int            pagefactor = 1;
+-      unsigned int            bitshift = BITS_PER_LONG - 1;
+-
+-      /* Figure out maximum filesize, on Linux this can depend on
+-       * the filesystem blocksize (on 32 bit platforms).
+-       * __block_write_begin does this in an [unsigned] long long...
+-       *      page->index << (PAGE_SHIFT - bbits)
+-       * So, for page sized blocks (4K on 32 bit platforms),
+-       * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
+-       *      (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1)
+-       * but for smaller blocksizes it is less (bbits = log2 bsize).
+-       */
+-
+-#if BITS_PER_LONG == 32
+-      ASSERT(sizeof(sector_t) == 8);
+-      pagefactor = PAGE_SIZE;
+-      bitshift = BITS_PER_LONG;
+-#endif
+-
+-      return (((uint64_t)pagefactor) << bitshift) - 1;
+-}
+-
+ /*
+  * Set parameters for inode allocation heuristics, taking into account
+  * filesystem size and inode32/inode64 mount options; i.e. specifically
+@@ -1650,6 +1624,26 @@ xfs_fs_fill_super(
+       if (error)
+               goto out_free_sb;
+ 
++      /*
++       * XFS block mappings use 54 bits to store the logical block offset.
++       * This should suffice to handle the maximum file size that the VFS
++       * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT
++       * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes
++       * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON
++       * to check this assertion.
++       *
++       * Avoid integer overflow by comparing the maximum bmbt offset to the
++       * maximum pagecache offset in units of fs blocks.
++       */
++      if (XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE) > XFS_MAX_FILEOFF) {
++              xfs_warn(mp,
++"MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!",
++                       XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE),
++                       XFS_MAX_FILEOFF);
++              error = -EINVAL;
++              goto out_free_sb;
++      }
++
+       error = xfs_filestream_mount(mp);
+       if (error)
+               goto out_free_sb;
+@@ -1661,7 +1655,7 @@ xfs_fs_fill_super(
+       sb->s_magic = XFS_SUPER_MAGIC;
+       sb->s_blocksize = mp->m_sb.sb_blocksize;
+       sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
+-      sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
++      sb->s_maxbytes = MAX_LFS_FILESIZE;
+       sb->s_max_links = XFS_MAXLINK;
+       sb->s_time_gran = 1;
+       sb->s_time_min = S32_MIN;
diff --git a/queue-5.4/xfs-fix-uninitialized-variable-in-xfs_attr3_leaf_inactive.patch b/queue-5.4/xfs-fix-uninitialized-variable-in-xfs_attr3_leaf_inactive.patch

new file mode 100644 (file)

index 0000000..b926056
--- /dev/null
+++ b/queue-5.4/xfs-fix-uninitialized-variable-in-xfs_attr3_leaf_inactive.patch
@@ -0,0 +1,39 @@
+From foo@baz Wed Oct  5 12:34:43 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed,  5 Oct 2022 12:31:04 +0530
+Subject: xfs: fix uninitialized variable in xfs_attr3_leaf_inactive
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221005070105.41929-11-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 54027a49938bbee1af62fad191139b14d4ee5cd2 upstream.
+
+Dan Carpenter pointed out that error is uninitialized.  While there
+never should be an attr leaf block with zero entries, let's not leave
+that logic bomb there.
+
+Fixes: 0bb9d159bd01 ("xfs: streamline xfs_attr3_leaf_inactive")
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Allison Collins <allison.henderson@oracle.com>
+Reviewed-by: Eric Sandeen <sandeen@redhat.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_attr_inactive.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_attr_inactive.c
++++ b/fs/xfs/xfs_attr_inactive.c
+@@ -88,7 +88,7 @@ xfs_attr3_leaf_inactive(
+       struct xfs_attr_leafblock       *leaf = bp->b_addr;
+       struct xfs_attr_leaf_entry      *entry;
+       struct xfs_attr_leaf_name_remote *name_rmt;
+-      int                             error;
++      int                             error = 0;
+       int                             i;
+ 
+       xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
diff --git a/queue-5.4/xfs-introduce-xfs_max_fileoff.patch b/queue-5.4/xfs-introduce-xfs_max_fileoff.patch

new file mode 100644 (file)

index 0000000..2c95d49
--- /dev/null
+++ b/queue-5.4/xfs-introduce-xfs_max_fileoff.patch
@@ -0,0 +1,56 @@
+From foo@baz Wed Oct  5 12:34:42 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed,  5 Oct 2022 12:30:56 +0530
+Subject: xfs: introduce XFS_MAX_FILEOFF
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221005070105.41929-3-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit a5084865524dee1fe8ea1fee17c60b4369ad4f5e upstream.
+
+Introduce a new #define for the maximum supported file block offset.
+We'll use this in the next patch to make it more obvious that we're
+doing some operation for all possible inode fork mappings after a given
+offset.  We can't use ULLONG_MAX here because bunmapi uses that to
+detect when it's done.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_format.h |    7 +++++++
+ fs/xfs/xfs_reflink.c       |    3 ++-
+ 2 files changed, 9 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_format.h
++++ b/fs/xfs/libxfs/xfs_format.h
+@@ -1540,6 +1540,13 @@ typedef struct xfs_bmdr_block {
+ #define BMBT_BLOCKCOUNT_BITLEN        21
+ 
+ #define BMBT_STARTOFF_MASK    ((1ULL << BMBT_STARTOFF_BITLEN) - 1)
++#define BMBT_BLOCKCOUNT_MASK  ((1ULL << BMBT_BLOCKCOUNT_BITLEN) - 1)
++
++/*
++ * bmbt records have a file offset (block) field that is 54 bits wide, so this
++ * is the largest xfs_fileoff_t that we ever expect to see.
++ */
++#define XFS_MAX_FILEOFF               (BMBT_STARTOFF_MASK + BMBT_BLOCKCOUNT_MASK)
+ 
+ typedef struct xfs_bmbt_rec {
+       __be64                  l0, l1;
+--- a/fs/xfs/xfs_reflink.c
++++ b/fs/xfs/xfs_reflink.c
+@@ -1544,7 +1544,8 @@ xfs_reflink_clear_inode_flag(
+        * We didn't find any shared blocks so turn off the reflink flag.
+        * First, get rid of any leftover CoW mappings.
+        */
+-      error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true);
++      error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, XFS_MAX_FILEOFF,
++                      true);
+       if (error)
+               return error;
+ 
diff --git a/queue-5.4/xfs-move-incore-structures-out-of-xfs_da_format.h.patch b/queue-5.4/xfs-move-incore-structures-out-of-xfs_da_format.h.patch

new file mode 100644 (file)

index 0000000..3651d8e
--- /dev/null
+++ b/queue-5.4/xfs-move-incore-structures-out-of-xfs_da_format.h.patch
@@ -0,0 +1,220 @@
+From foo@baz Wed Oct  5 12:34:43 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed,  5 Oct 2022 12:31:02 +0530
+Subject: xfs: move incore structures out of xfs_da_format.h
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221005070105.41929-9-chandan.babu@oracle.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit a39f089a25e75c3d17b955d8eb8bc781f23364f3 upstream.
+
+Move the abstract in-memory version of various btree block headers
+out of xfs_da_format.h as they aren't on-disk formats.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_attr_leaf.h |   23 ++++++++++++++++
+ fs/xfs/libxfs/xfs_da_btree.h  |   13 +++++++++
+ fs/xfs/libxfs/xfs_da_format.c |    1 
+ fs/xfs/libxfs/xfs_da_format.h |   57 ------------------------------------------
+ fs/xfs/libxfs/xfs_dir2.h      |    2 +
+ fs/xfs/libxfs/xfs_dir2_priv.h |   19 ++++++++++++++
+ 6 files changed, 58 insertions(+), 57 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_attr_leaf.h
++++ b/fs/xfs/libxfs/xfs_attr_leaf.h
+@@ -17,6 +17,29 @@ struct xfs_inode;
+ struct xfs_trans;
+ 
+ /*
++ * Incore version of the attribute leaf header.
++ */
++struct xfs_attr3_icleaf_hdr {
++      uint32_t        forw;
++      uint32_t        back;
++      uint16_t        magic;
++      uint16_t        count;
++      uint16_t        usedbytes;
++      /*
++       * Firstused is 32-bit here instead of 16-bit like the on-disk variant
++       * to support maximum fsb size of 64k without overflow issues throughout
++       * the attr code. Instead, the overflow condition is handled on
++       * conversion to/from disk.
++       */
++      uint32_t        firstused;
++      __u8            holes;
++      struct {
++              uint16_t        base;
++              uint16_t        size;
++      } freemap[XFS_ATTR_LEAF_MAPSIZE];
++};
++
++/*
+  * Used to keep a list of "remote value" extents when unlinking an inode.
+  */
+ typedef struct xfs_attr_inactive_list {
+--- a/fs/xfs/libxfs/xfs_da_btree.h
++++ b/fs/xfs/libxfs/xfs_da_btree.h
+@@ -127,6 +127,19 @@ typedef struct xfs_da_state {
+ } xfs_da_state_t;
+ 
+ /*
++ * In-core version of the node header to abstract the differences in the v2 and
++ * v3 disk format of the headers. Callers need to convert to/from disk format as
++ * appropriate.
++ */
++struct xfs_da3_icnode_hdr {
++      uint32_t                forw;
++      uint32_t                back;
++      uint16_t                magic;
++      uint16_t                count;
++      uint16_t                level;
++};
++
++/*
+  * Utility macros to aid in logging changed structure fields.
+  */
+ #define XFS_DA_LOGOFF(BASE, ADDR)     ((char *)(ADDR) - (char *)(BASE))
+--- a/fs/xfs/libxfs/xfs_da_format.c
++++ b/fs/xfs/libxfs/xfs_da_format.c
+@@ -13,6 +13,7 @@
+ #include "xfs_mount.h"
+ #include "xfs_inode.h"
+ #include "xfs_dir2.h"
++#include "xfs_dir2_priv.h"
+ 
+ /*
+  * Shortform directory ops
+--- a/fs/xfs/libxfs/xfs_da_format.h
++++ b/fs/xfs/libxfs/xfs_da_format.h
+@@ -94,19 +94,6 @@ struct xfs_da3_intnode {
+ };
+ 
+ /*
+- * In-core version of the node header to abstract the differences in the v2 and
+- * v3 disk format of the headers. Callers need to convert to/from disk format as
+- * appropriate.
+- */
+-struct xfs_da3_icnode_hdr {
+-      uint32_t        forw;
+-      uint32_t        back;
+-      uint16_t        magic;
+-      uint16_t        count;
+-      uint16_t        level;
+-};
+-
+-/*
+  * Directory version 2.
+  *
+  * There are 4 possible formats:
+@@ -434,14 +421,6 @@ struct xfs_dir3_leaf_hdr {
+       __be32                  pad;            /* 64 bit alignment */
+ };
+ 
+-struct xfs_dir3_icleaf_hdr {
+-      uint32_t                forw;
+-      uint32_t                back;
+-      uint16_t                magic;
+-      uint16_t                count;
+-      uint16_t                stale;
+-};
+-
+ /*
+  * Leaf block entry.
+  */
+@@ -521,19 +500,6 @@ struct xfs_dir3_free {
+ #define XFS_DIR3_FREE_CRC_OFF  offsetof(struct xfs_dir3_free, hdr.hdr.crc)
+ 
+ /*
+- * In core version of the free block header, abstracted away from on-disk format
+- * differences. Use this in the code, and convert to/from the disk version using
+- * xfs_dir3_free_hdr_from_disk/xfs_dir3_free_hdr_to_disk.
+- */
+-struct xfs_dir3_icfree_hdr {
+-      uint32_t        magic;
+-      uint32_t        firstdb;
+-      uint32_t        nvalid;
+-      uint32_t        nused;
+-
+-};
+-
+-/*
+  * Single block format.
+  *
+  * The single block format looks like the following drawing on disk:
+@@ -710,29 +676,6 @@ struct xfs_attr3_leafblock {
+ };
+ 
+ /*
+- * incore, neutral version of the attribute leaf header
+- */
+-struct xfs_attr3_icleaf_hdr {
+-      uint32_t        forw;
+-      uint32_t        back;
+-      uint16_t        magic;
+-      uint16_t        count;
+-      uint16_t        usedbytes;
+-      /*
+-       * firstused is 32-bit here instead of 16-bit like the on-disk variant
+-       * to support maximum fsb size of 64k without overflow issues throughout
+-       * the attr code. Instead, the overflow condition is handled on
+-       * conversion to/from disk.
+-       */
+-      uint32_t        firstused;
+-      __u8            holes;
+-      struct {
+-              uint16_t        base;
+-              uint16_t        size;
+-      } freemap[XFS_ATTR_LEAF_MAPSIZE];
+-};
+-
+-/*
+  * Special value to represent fs block size in the leaf header firstused field.
+  * Only used when block size overflows the 2-bytes available on disk.
+  */
+--- a/fs/xfs/libxfs/xfs_dir2.h
++++ b/fs/xfs/libxfs/xfs_dir2.h
+@@ -18,6 +18,8 @@ struct xfs_dir2_sf_entry;
+ struct xfs_dir2_data_hdr;
+ struct xfs_dir2_data_entry;
+ struct xfs_dir2_data_unused;
++struct xfs_dir3_icfree_hdr;
++struct xfs_dir3_icleaf_hdr;
+ 
+ extern struct xfs_name        xfs_name_dotdot;
+ 
+--- a/fs/xfs/libxfs/xfs_dir2_priv.h
++++ b/fs/xfs/libxfs/xfs_dir2_priv.h
+@@ -8,6 +8,25 @@
+ 
+ struct dir_context;
+ 
++/*
++ * In-core version of the leaf and free block headers to abstract the
++ * differences in the v2 and v3 disk format of the headers.
++ */
++struct xfs_dir3_icleaf_hdr {
++      uint32_t                forw;
++      uint32_t                back;
++      uint16_t                magic;
++      uint16_t                count;
++      uint16_t                stale;
++};
++
++struct xfs_dir3_icfree_hdr {
++      uint32_t                magic;
++      uint32_t                firstdb;
++      uint32_t                nvalid;
++      uint32_t                nused;
++};
++
+ /* xfs_dir2.c */
+ extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
+                               xfs_dir2_db_t *dbp);
diff --git a/queue-5.4/xfs-refactor-remote-attr-value-buffer-invalidation.patch b/queue-5.4/xfs-refactor-remote-attr-value-buffer-invalidation.patch

new file mode 100644 (file)

index 0000000..144608b
--- /dev/null
+++ b/queue-5.4/xfs-refactor-remote-attr-value-buffer-invalidation.patch
@@ -0,0 +1,116 @@
+From foo@baz Wed Oct  5 12:34:43 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed,  5 Oct 2022 12:31:00 +0530
+Subject: xfs: refactor remote attr value buffer invalidation
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221005070105.41929-7-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 8edbb26b06023de31ad7d4c9b984d99f66577929 upstream.
+
+[Replaced XFS_IS_CORRUPT() calls with ASSERT() for 5.4.y backport]
+
+Hoist the code that invalidates remote extended attribute value buffers
+into a separate helper function.  This prepares us for a memory
+corruption fix in the next patch.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_attr_remote.c |   48 ++++++++++++++++++++++++----------------
+ fs/xfs/libxfs/xfs_attr_remote.h |    2 +
+ 2 files changed, 31 insertions(+), 19 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_attr_remote.c
++++ b/fs/xfs/libxfs/xfs_attr_remote.c
+@@ -551,6 +551,32 @@ xfs_attr_rmtval_set(
+       return 0;
+ }
+ 
++/* Mark stale any incore buffers for the remote value. */
++int
++xfs_attr_rmtval_stale(
++      struct xfs_inode        *ip,
++      struct xfs_bmbt_irec    *map,
++      xfs_buf_flags_t         incore_flags)
++{
++      struct xfs_mount        *mp = ip->i_mount;
++      struct xfs_buf          *bp;
++
++      ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
++
++      ASSERT((map->br_startblock != DELAYSTARTBLOCK) &&
++             (map->br_startblock != HOLESTARTBLOCK));
++
++      bp = xfs_buf_incore(mp->m_ddev_targp,
++                      XFS_FSB_TO_DADDR(mp, map->br_startblock),
++                      XFS_FSB_TO_BB(mp, map->br_blockcount), incore_flags);
++      if (bp) {
++              xfs_buf_stale(bp);
++              xfs_buf_relse(bp);
++      }
++
++      return 0;
++}
++
+ /*
+  * Remove the value associated with an attribute by deleting the
+  * out-of-line buffer that it is stored on.
+@@ -559,7 +585,6 @@ int
+ xfs_attr_rmtval_remove(
+       struct xfs_da_args      *args)
+ {
+-      struct xfs_mount        *mp = args->dp->i_mount;
+       xfs_dablk_t             lblkno;
+       int                     blkcnt;
+       int                     error;
+@@ -574,9 +599,6 @@ xfs_attr_rmtval_remove(
+       blkcnt = args->rmtblkcnt;
+       while (blkcnt > 0) {
+               struct xfs_bmbt_irec    map;
+-              struct xfs_buf          *bp;
+-              xfs_daddr_t             dblkno;
+-              int                     dblkcnt;
+               int                     nmap;
+ 
+               /*
+@@ -588,21 +610,9 @@ xfs_attr_rmtval_remove(
+               if (error)
+                       return error;
+               ASSERT(nmap == 1);
+-              ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
+-                     (map.br_startblock != HOLESTARTBLOCK));
+-
+-              dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
+-              dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
+-
+-              /*
+-               * If the "remote" value is in the cache, remove it.
+-               */
+-              bp = xfs_buf_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK);
+-              if (bp) {
+-                      xfs_buf_stale(bp);
+-                      xfs_buf_relse(bp);
+-                      bp = NULL;
+-              }
++              error = xfs_attr_rmtval_stale(args->dp, &map, XBF_TRYLOCK);
++              if (error)
++                      return error;
+ 
+               lblkno += map.br_blockcount;
+               blkcnt -= map.br_blockcount;
+--- a/fs/xfs/libxfs/xfs_attr_remote.h
++++ b/fs/xfs/libxfs/xfs_attr_remote.h
+@@ -11,5 +11,7 @@ int xfs_attr3_rmt_blocks(struct xfs_moun
+ int xfs_attr_rmtval_get(struct xfs_da_args *args);
+ int xfs_attr_rmtval_set(struct xfs_da_args *args);
+ int xfs_attr_rmtval_remove(struct xfs_da_args *args);
++int xfs_attr_rmtval_stale(struct xfs_inode *ip, struct xfs_bmbt_irec *map,
++              xfs_buf_flags_t incore_flags);
+ 
+ #endif /* __XFS_ATTR_REMOTE_H__ */
diff --git a/queue-5.4/xfs-remove-unused-variable-done.patch b/queue-5.4/xfs-remove-unused-variable-done.patch

new file mode 100644 (file)

index 0000000..459a48f
--- /dev/null
+++ b/queue-5.4/xfs-remove-unused-variable-done.patch
@@ -0,0 +1,42 @@
+From foo@baz Wed Oct  5 12:34:43 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed,  5 Oct 2022 12:31:05 +0530
+Subject: xfs: remove unused variable 'done'
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221005070105.41929-12-chandan.babu@oracle.com>
+
+From: YueHaibing <yuehaibing@huawei.com>
+
+commit b3531f5fc16d4df2b12567bce48cd9f3ab5f9131 upstream.
+
+fs/xfs/xfs_inode.c: In function 'xfs_itruncate_extents_flags':
+fs/xfs/xfs_inode.c:1523:8: warning: unused variable 'done' [-Wunused-variable]
+
+commit 4bbb04abb4ee ("xfs: truncate should remove
+all blocks, not just to the end of the page cache")
+left behind this, so remove it.
+
+Fixes: 4bbb04abb4ee ("xfs: truncate should remove all blocks, not just to the end of the page cache")
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
+Signed-off-by: YueHaibing <yuehaibing@huawei.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_inode.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -1515,7 +1515,6 @@ xfs_itruncate_extents_flags(
+       xfs_fileoff_t           first_unmap_block;
+       xfs_filblks_t           unmap_len;
+       int                     error = 0;
+-      int                     done = 0;
+ 
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       ASSERT(!atomic_read(&VFS_I(ip)->i_count) ||
diff --git a/queue-5.4/xfs-streamline-xfs_attr3_leaf_inactive.patch b/queue-5.4/xfs-streamline-xfs_attr3_leaf_inactive.patch

new file mode 100644 (file)

index 0000000..9cc0bdf
--- /dev/null
+++ b/queue-5.4/xfs-streamline-xfs_attr3_leaf_inactive.patch
@@ -0,0 +1,195 @@
+From foo@baz Wed Oct  5 12:34:43 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed,  5 Oct 2022 12:31:03 +0530
+Subject: xfs: streamline xfs_attr3_leaf_inactive
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221005070105.41929-10-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 0bb9d159bd018b271e783d3b2d3bc82fa0727321 upstream.
+
+Now that we know we don't have to take a transaction to stale the incore
+buffers for a remote value, get rid of the unnecessary memory allocation
+in the leaf walker and call the rmt_stale function directly.  Flatten
+the loop while we're at it.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_attr_leaf.h |    9 ---
+ fs/xfs/xfs_attr_inactive.c    |  103 ++++++++++++------------------------------
+ 2 files changed, 30 insertions(+), 82 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_attr_leaf.h
++++ b/fs/xfs/libxfs/xfs_attr_leaf.h
+@@ -39,15 +39,6 @@ struct xfs_attr3_icleaf_hdr {
+       } freemap[XFS_ATTR_LEAF_MAPSIZE];
+ };
+ 
+-/*
+- * Used to keep a list of "remote value" extents when unlinking an inode.
+- */
+-typedef struct xfs_attr_inactive_list {
+-      xfs_dablk_t     valueblk;       /* block number of value bytes */
+-      int             valuelen;       /* number of bytes in value */
+-} xfs_attr_inactive_list_t;
+-
+-
+ /*========================================================================
+  * Function prototypes for the kernel.
+  *========================================================================*/
+--- a/fs/xfs/xfs_attr_inactive.c
++++ b/fs/xfs/xfs_attr_inactive.c
+@@ -37,8 +37,6 @@ xfs_attr3_rmt_stale(
+       int                     blkcnt)
+ {
+       struct xfs_bmbt_irec    map;
+-      xfs_dablk_t             tblkno;
+-      int                     tblkcnt;
+       int                     nmap;
+       int                     error;
+ 
+@@ -46,14 +44,12 @@ xfs_attr3_rmt_stale(
+        * Roll through the "value", invalidating the attribute value's
+        * blocks.
+        */
+-      tblkno = blkno;
+-      tblkcnt = blkcnt;
+-      while (tblkcnt > 0) {
++      while (blkcnt > 0) {
+               /*
+                * Try to remember where we decided to put the value.
+                */
+               nmap = 1;
+-              error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt,
++              error = xfs_bmapi_read(dp, (xfs_fileoff_t)blkno, blkcnt,
+                                      &map, &nmap, XFS_BMAPI_ATTRFORK);
+               if (error)
+                       return error;
+@@ -68,8 +64,8 @@ xfs_attr3_rmt_stale(
+               if (error)
+                       return error;
+ 
+-              tblkno += map.br_blockcount;
+-              tblkcnt -= map.br_blockcount;
++              blkno += map.br_blockcount;
++              blkcnt -= map.br_blockcount;
+       }
+ 
+       return 0;
+@@ -83,84 +79,45 @@ xfs_attr3_rmt_stale(
+  */
+ STATIC int
+ xfs_attr3_leaf_inactive(
+-      struct xfs_trans        **trans,
+-      struct xfs_inode        *dp,
+-      struct xfs_buf          *bp)
++      struct xfs_trans                **trans,
++      struct xfs_inode                *dp,
++      struct xfs_buf                  *bp)
+ {
+-      struct xfs_attr_leafblock *leaf;
+-      struct xfs_attr3_icleaf_hdr ichdr;
+-      struct xfs_attr_leaf_entry *entry;
++      struct xfs_attr3_icleaf_hdr     ichdr;
++      struct xfs_mount                *mp = bp->b_mount;
++      struct xfs_attr_leafblock       *leaf = bp->b_addr;
++      struct xfs_attr_leaf_entry      *entry;
+       struct xfs_attr_leaf_name_remote *name_rmt;
+-      struct xfs_attr_inactive_list *list;
+-      struct xfs_attr_inactive_list *lp;
+-      int                     error;
+-      int                     count;
+-      int                     size;
+-      int                     tmp;
+-      int                     i;
+-      struct xfs_mount        *mp = bp->b_mount;
++      int                             error;
++      int                             i;
+ 
+-      leaf = bp->b_addr;
+       xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
+ 
+       /*
+-       * Count the number of "remote" value extents.
++       * Find the remote value extents for this leaf and invalidate their
++       * incore buffers.
+        */
+-      count = 0;
+       entry = xfs_attr3_leaf_entryp(leaf);
+       for (i = 0; i < ichdr.count; entry++, i++) {
+-              if (be16_to_cpu(entry->nameidx) &&
+-                  ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
+-                      name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
+-                      if (name_rmt->valueblk)
+-                              count++;
+-              }
+-      }
++              int             blkcnt;
+ 
+-      /*
+-       * If there are no "remote" values, we're done.
+-       */
+-      if (count == 0) {
+-              xfs_trans_brelse(*trans, bp);
+-              return 0;
+-      }
++              if (!entry->nameidx || (entry->flags & XFS_ATTR_LOCAL))
++                      continue;
+ 
+-      /*
+-       * Allocate storage for a list of all the "remote" value extents.
+-       */
+-      size = count * sizeof(xfs_attr_inactive_list_t);
+-      list = kmem_alloc(size, 0);
+-
+-      /*
+-       * Identify each of the "remote" value extents.
+-       */
+-      lp = list;
+-      entry = xfs_attr3_leaf_entryp(leaf);
+-      for (i = 0; i < ichdr.count; entry++, i++) {
+-              if (be16_to_cpu(entry->nameidx) &&
+-                  ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
+-                      name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
+-                      if (name_rmt->valueblk) {
+-                              lp->valueblk = be32_to_cpu(name_rmt->valueblk);
+-                              lp->valuelen = xfs_attr3_rmt_blocks(dp->i_mount,
+-                                                  be32_to_cpu(name_rmt->valuelen));
+-                              lp++;
+-                      }
+-              }
+-      }
+-      xfs_trans_brelse(*trans, bp);   /* unlock for trans. in freextent() */
+-
+-      /*
+-       * Invalidate each of the "remote" value extents.
+-       */
+-      error = 0;
+-      for (lp = list, i = 0; i < count; i++, lp++) {
+-              tmp = xfs_attr3_rmt_stale(dp, lp->valueblk, lp->valuelen);
+-              if (error == 0)
+-                      error = tmp;    /* save only the 1st errno */
++              name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
++              if (!name_rmt->valueblk)
++                      continue;
++
++              blkcnt = xfs_attr3_rmt_blocks(dp->i_mount,
++                              be32_to_cpu(name_rmt->valuelen));
++              error = xfs_attr3_rmt_stale(dp,
++                              be32_to_cpu(name_rmt->valueblk), blkcnt);
++              if (error)
++                      goto err;
+       }
+ 
+-      kmem_free(list);
++      xfs_trans_brelse(*trans, bp);
++err:
+       return error;
+ }
+ 
diff --git a/queue-5.4/xfs-truncate-should-remove-all-blocks-not-just-to-the-end-of-the-page-cache.patch b/queue-5.4/xfs-truncate-should-remove-all-blocks-not-just-to-the-end-of-the-page-cache.patch

new file mode 100644 (file)

index 0000000..c9e4351
--- /dev/null
+++ b/queue-5.4/xfs-truncate-should-remove-all-blocks-not-just-to-the-end-of-the-page-cache.patch
@@ -0,0 +1,83 @@
+From foo@baz Wed Oct  5 12:34:42 PM CEST 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Wed,  5 Oct 2022 12:30:57 +0530
+Subject: xfs: truncate should remove all blocks, not just to the end of the page cache
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221005070105.41929-4-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 4bbb04abb4ee2e1f7d65e52557ba1c4038ea43ed upstream.
+
+xfs_itruncate_extents_flags() is supposed to unmap every block in a file
+from EOF onwards.  Oddly, it uses s_maxbytes as the upper limit to the
+bunmapi range, even though s_maxbytes reflects the highest offset the
+pagecache can support, not the highest offset that XFS supports.
+
+The result of this confusion is that if you create a 20T file on a
+64-bit machine, mount the filesystem on a 32-bit machine, and remove the
+file, we leak everything above 16T.  Fix this by capping the bunmapi
+request at the maximum possible block offset, not s_maxbytes.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_inode.c |   24 ++++++++++++------------
+ 1 file changed, 12 insertions(+), 12 deletions(-)
+
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -1513,7 +1513,6 @@ xfs_itruncate_extents_flags(
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_trans        *tp = *tpp;
+       xfs_fileoff_t           first_unmap_block;
+-      xfs_fileoff_t           last_block;
+       xfs_filblks_t           unmap_len;
+       int                     error = 0;
+       int                     done = 0;
+@@ -1536,21 +1535,22 @@ xfs_itruncate_extents_flags(
+        * the end of the file (in a crash where the space is allocated
+        * but the inode size is not yet updated), simply remove any
+        * blocks which show up between the new EOF and the maximum
+-       * possible file size.  If the first block to be removed is
+-       * beyond the maximum file size (ie it is the same as last_block),
+-       * then there is nothing to do.
++       * possible file size.
++       *
++       * We have to free all the blocks to the bmbt maximum offset, even if
++       * the page cache can't scale that far.
+        */
+       first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
+-      last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
+-      if (first_unmap_block == last_block)
++      if (first_unmap_block >= XFS_MAX_FILEOFF) {
++              WARN_ON_ONCE(first_unmap_block > XFS_MAX_FILEOFF);
+               return 0;
++      }
+ 
+-      ASSERT(first_unmap_block < last_block);
+-      unmap_len = last_block - first_unmap_block + 1;
+-      while (!done) {
++      unmap_len = XFS_MAX_FILEOFF - first_unmap_block + 1;
++      while (unmap_len > 0) {
+               ASSERT(tp->t_firstblock == NULLFSBLOCK);
+-              error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags,
+-                                  XFS_ITRUNC_MAX_EXTENTS, &done);
++              error = __xfs_bunmapi(tp, ip, first_unmap_block, &unmap_len,
++                              flags, XFS_ITRUNC_MAX_EXTENTS);
+               if (error)
+                       goto out;
+ 
+@@ -1570,7 +1570,7 @@ xfs_itruncate_extents_flags(
+       if (whichfork == XFS_DATA_FORK) {
+               /* Remove all pending CoW reservations. */
+               error = xfs_reflink_cancel_cow_blocks(ip, &tp,
+-                              first_unmap_block, last_block, true);
++                              first_unmap_block, XFS_MAX_FILEOFF, true);
+               if (error)
+                       goto out;
+
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 5 Oct 2022 10:35:34 +0000 (12:35 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 5 Oct 2022 10:35:34 +0000 (12:35 +0200)
queue-5.4/series		patch \| blob \| blame \| history
queue-5.4/xfs-fix-iocb_nowait-handling-in-xfs_file_dio_aio_read.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/xfs-fix-memory-corruption-during-remote-attr-value-buffer-invalidation.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/xfs-fix-misuse-of-the-xfs_attr_incomplete-flag.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/xfs-fix-s_maxbytes-computation-on-32-bit-kernels.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/xfs-fix-uninitialized-variable-in-xfs_attr3_leaf_inactive.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/xfs-introduce-xfs_max_fileoff.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/xfs-move-incore-structures-out-of-xfs_da_format.h.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/xfs-refactor-remote-attr-value-buffer-invalidation.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/xfs-remove-unused-variable-done.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/xfs-streamline-xfs_attr3_leaf_inactive.patch	[new file with mode: 0644]	patch \| blob
queue-5.4/xfs-truncate-should-remove-all-blocks-not-just-to-the-end-of-the-page-cache.patch	[new file with mode: 0644]	patch \| blob