]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 7 Nov 2022 08:52:36 +0000 (09:52 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 7 Nov 2022 08:52:36 +0000 (09:52 +0100)
added patches:
xfs-add-the-missed-xfs_perag_put-for-xfs_ifree_cluster.patch
xfs-don-t-fail-unwritten-extent-conversion-on-writeback-due-to-edquot.patch
xfs-don-t-fail-verifier-on-empty-attr3-leaf-block.patch
xfs-group-quota-should-return-edquot-when-prj-quota-enabled.patch
xfs-gut-error-handling-in-xfs_trans_unreserve_and_mod_sb.patch
xfs-use-ordered-buffers-to-initialize-dquot-buffers-during-quotacheck.patch

queue-5.4/series
queue-5.4/xfs-add-the-missed-xfs_perag_put-for-xfs_ifree_cluster.patch [new file with mode: 0644]
queue-5.4/xfs-don-t-fail-unwritten-extent-conversion-on-writeback-due-to-edquot.patch [new file with mode: 0644]
queue-5.4/xfs-don-t-fail-verifier-on-empty-attr3-leaf-block.patch [new file with mode: 0644]
queue-5.4/xfs-group-quota-should-return-edquot-when-prj-quota-enabled.patch [new file with mode: 0644]
queue-5.4/xfs-gut-error-handling-in-xfs_trans_unreserve_and_mod_sb.patch [new file with mode: 0644]
queue-5.4/xfs-use-ordered-buffers-to-initialize-dquot-buffers-during-quotacheck.patch [new file with mode: 0644]

index 046cceb483e72fcdba6073358eae25a225f0c7c1..b41f85457683992a819b599d0e8c0d9222cba791 100644 (file)
@@ -34,3 +34,9 @@ media-meson-vdec-fix-possible-refcount-leak-in-vdec_.patch
 scsi-core-restrict-legal-sdev_state-transitions-via-.patch
 hid-saitek-add-madcatz-variant-of-mmo7-mouse-device-.patch
 i2c-xiic-add-platform-module-alias.patch
+xfs-don-t-fail-verifier-on-empty-attr3-leaf-block.patch
+xfs-use-ordered-buffers-to-initialize-dquot-buffers-during-quotacheck.patch
+xfs-gut-error-handling-in-xfs_trans_unreserve_and_mod_sb.patch
+xfs-group-quota-should-return-edquot-when-prj-quota-enabled.patch
+xfs-don-t-fail-unwritten-extent-conversion-on-writeback-due-to-edquot.patch
+xfs-add-the-missed-xfs_perag_put-for-xfs_ifree_cluster.patch
diff --git a/queue-5.4/xfs-add-the-missed-xfs_perag_put-for-xfs_ifree_cluster.patch b/queue-5.4/xfs-add-the-missed-xfs_perag_put-for-xfs_ifree_cluster.patch
new file mode 100644 (file)
index 0000000..22beff9
--- /dev/null
@@ -0,0 +1,41 @@
+From foo@baz Mon Nov  7 09:49:06 AM CET 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Mon,  7 Nov 2022 09:33:27 +0530
+Subject: xfs: Add the missed xfs_perag_put() for xfs_ifree_cluster()
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221107040327.132719-7-chandan.babu@oracle.com>
+
+From: Chuhong Yuan <hslester96@gmail.com>
+
+commit 8cc0072469723459dc6bd7beff81b2b3149f4cf4 upstream.
+
+xfs_ifree_cluster() calls xfs_perag_get() at the beginning, but forgets to
+call xfs_perag_put() in one failed path.
+Add the missed function call to fix it.
+
+Fixes: ce92464c180b ("xfs: make xfs_trans_get_buf return an error code")
+Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_inode.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -2592,8 +2592,10 @@ xfs_ifree_cluster(
+                                       mp->m_bsize * igeo->blocks_per_cluster,
+                                       XBF_UNMAPPED);
+-              if (!bp)
++              if (!bp) {
++                      xfs_perag_put(pag);
+                       return -ENOMEM;
++              }
+               /*
+                * This buffer may not have been correctly initialised as we
diff --git a/queue-5.4/xfs-don-t-fail-unwritten-extent-conversion-on-writeback-due-to-edquot.patch b/queue-5.4/xfs-don-t-fail-unwritten-extent-conversion-on-writeback-due-to-edquot.patch
new file mode 100644 (file)
index 0000000..f024440
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Mon Nov  7 09:49:06 AM CET 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Mon,  7 Nov 2022 09:33:26 +0530
+Subject: xfs: don't fail unwritten extent conversion on writeback due to edquot
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221107040327.132719-6-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 1edd2c055dff9710b1e29d4df01902abb0a55f1f upstream.
+
+During writeback, it's possible for the quota block reservation in
+xfs_iomap_write_unwritten to fail with EDQUOT because we hit the quota
+limit.  This causes writeback errors for data that was already written
+to disk, when it's not even guaranteed that the bmbt will expand to
+exceed the quota limit.  Irritatingly, this condition is reported to
+userspace as EIO by fsync, which is confusing.
+
+We wrote the data, so allow the reservation.  That might put us slightly
+above the hard limit, but it's better than losing data after a write.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_iomap.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_iomap.c
++++ b/fs/xfs/xfs_iomap.c
+@@ -789,7 +789,7 @@ xfs_iomap_write_unwritten(
+               xfs_trans_ijoin(tp, ip, 0);
+               error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
+-                              XFS_QMOPT_RES_REGBLKS);
++                              XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES);
+               if (error)
+                       goto error_on_bmapi_transaction;
diff --git a/queue-5.4/xfs-don-t-fail-verifier-on-empty-attr3-leaf-block.patch b/queue-5.4/xfs-don-t-fail-verifier-on-empty-attr3-leaf-block.patch
new file mode 100644 (file)
index 0000000..8a12657
--- /dev/null
@@ -0,0 +1,68 @@
+From foo@baz Mon Nov  7 09:49:06 AM CET 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Mon,  7 Nov 2022 09:33:22 +0530
+Subject: xfs: don't fail verifier on empty attr3 leaf block
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221107040327.132719-2-chandan.babu@oracle.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit f28cef9e4daca11337cb9f144cdebedaab69d78c upstream.
+
+The attr fork can transition from shortform to leaf format while
+empty if the first xattr doesn't fit in shortform. While this empty
+leaf block state is intended to be transient, it is technically not
+due to the transactional implementation of the xattr set operation.
+
+We historically have a couple of bandaids to work around this
+problem. The first is to hold the buffer after the format conversion
+to prevent premature writeback of the empty leaf buffer and the
+second is to bypass the xattr count check in the verifier during
+recovery. The latter assumes that the xattr set is also in the log
+and will be recovered into the buffer soon after the empty leaf
+buffer is reconstructed. This is not guaranteed, however.
+
+If the filesystem crashes after the format conversion but before the
+xattr set that induced it, only the format conversion may exist in
+the log. When recovered, this creates a latent corrupted state on
+the inode as any subsequent attempts to read the buffer fail due to
+verifier failure. This includes further attempts to set xattrs on
+the inode or attempts to destroy the attr fork, which prevents the
+inode from ever being removed from the unlinked list.
+
+To avoid this condition, accept that an empty attr leaf block is a
+valid state and remove the count check from the verifier. This means
+that on rare occasions an attr fork might exist in an unexpected
+state, but is otherwise consistent and functional. Note that we
+retain the logic to avoid racing with metadata writeback to reduce
+the window where this can occur.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_attr_leaf.c |    8 --------
+ 1 file changed, 8 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_attr_leaf.c
++++ b/fs/xfs/libxfs/xfs_attr_leaf.c
+@@ -251,14 +251,6 @@ xfs_attr3_leaf_verify(
+               return fa;
+       /*
+-       * In recovery there is a transient state where count == 0 is valid
+-       * because we may have transitioned an empty shortform attr to a leaf
+-       * if the attr didn't fit in shortform.
+-       */
+-      if (!xfs_log_in_recovery(mp) && ichdr.count == 0)
+-              return __this_address;
+-
+-      /*
+        * firstused is the block offset of the first name info structure.
+        * Make sure it doesn't go off the block or crash into the header.
+        */
diff --git a/queue-5.4/xfs-group-quota-should-return-edquot-when-prj-quota-enabled.patch b/queue-5.4/xfs-group-quota-should-return-edquot-when-prj-quota-enabled.patch
new file mode 100644 (file)
index 0000000..9d04aa0
--- /dev/null
@@ -0,0 +1,48 @@
+From foo@baz Mon Nov  7 09:49:06 AM CET 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Mon,  7 Nov 2022 09:33:25 +0530
+Subject: xfs: group quota should return EDQUOT when prj quota enabled
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221107040327.132719-5-chandan.babu@oracle.com>
+
+From: Eric Sandeen <sandeen@redhat.com>
+
+commit c8d329f311c4d3d8f8e6dc5897ec235e37f48ae8 upstream.
+
+Long ago, group & project quota were mutually exclusive, and so
+when we turned on XFS_QMOPT_ENOSPC ("return ENOSPC if project quota
+is exceeded") when project quota was enabled, we only needed to
+disable it again for user quota.
+
+When group & project quota got separated, this got missed, and as a
+result if project quota is enabled and group quota is exceeded, the
+error code returned is incorrectly returned as ENOSPC not EDQUOT.
+
+Fix this by stripping XFS_QMOPT_ENOSPC out of flags for group
+quota when we try to reserve the space.
+
+Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_trans_dquot.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/xfs_trans_dquot.c
++++ b/fs/xfs/xfs_trans_dquot.c
+@@ -756,7 +756,8 @@ xfs_trans_reserve_quota_bydquots(
+       }
+       if (gdqp) {
+-              error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags);
++              error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos,
++                                      (flags & ~XFS_QMOPT_ENOSPC));
+               if (error)
+                       goto unwind_usr;
+       }
diff --git a/queue-5.4/xfs-gut-error-handling-in-xfs_trans_unreserve_and_mod_sb.patch b/queue-5.4/xfs-gut-error-handling-in-xfs_trans_unreserve_and_mod_sb.patch
new file mode 100644 (file)
index 0000000..1a60cc8
--- /dev/null
@@ -0,0 +1,234 @@
+From foo@baz Mon Nov  7 09:49:06 AM CET 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Mon,  7 Nov 2022 09:33:24 +0530
+Subject: xfs: gut error handling in xfs_trans_unreserve_and_mod_sb()
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221107040327.132719-4-chandan.babu@oracle.com>
+
+From: Dave Chinner <david@fromorbit.com>
+
+commit dc3ffbb14060c943469d5e12900db3a60bc3fa64 upstream.
+
+The error handling in xfs_trans_unreserve_and_mod_sb() is largely
+incorrect - rolling back the changes in the transaction if only one
+counter underruns makes all the other counters incorrect. We still
+allow the change to proceed and committing the transaction, except
+now we have multiple incorrect counters instead of a single
+underflow.
+
+Further, we don't actually report the error to the caller, so this
+is completely silent except on debug kernels that will assert on
+failure before we even get to the rollback code.  Hence this error
+handling is broken, untested, and largely unnecessary complexity.
+
+Just remove it.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_trans.c |  163 ++++++-----------------------------------------------
+ 1 file changed, 20 insertions(+), 143 deletions(-)
+
+--- a/fs/xfs/xfs_trans.c
++++ b/fs/xfs/xfs_trans.c
+@@ -532,57 +532,9 @@ xfs_trans_apply_sb_deltas(
+                                 sizeof(sbp->sb_frextents) - 1);
+ }
+-STATIC int
+-xfs_sb_mod8(
+-      uint8_t                 *field,
+-      int8_t                  delta)
+-{
+-      int8_t                  counter = *field;
+-
+-      counter += delta;
+-      if (counter < 0) {
+-              ASSERT(0);
+-              return -EINVAL;
+-      }
+-      *field = counter;
+-      return 0;
+-}
+-
+-STATIC int
+-xfs_sb_mod32(
+-      uint32_t                *field,
+-      int32_t                 delta)
+-{
+-      int32_t                 counter = *field;
+-
+-      counter += delta;
+-      if (counter < 0) {
+-              ASSERT(0);
+-              return -EINVAL;
+-      }
+-      *field = counter;
+-      return 0;
+-}
+-
+-STATIC int
+-xfs_sb_mod64(
+-      uint64_t                *field,
+-      int64_t                 delta)
+-{
+-      int64_t                 counter = *field;
+-
+-      counter += delta;
+-      if (counter < 0) {
+-              ASSERT(0);
+-              return -EINVAL;
+-      }
+-      *field = counter;
+-      return 0;
+-}
+-
+ /*
+- * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations
+- * and apply superblock counter changes to the in-core superblock.  The
++ * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations and
++ * apply superblock counter changes to the in-core superblock.  The
+  * t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT
+  * applied to the in-core superblock.  The idea is that that has already been
+  * done.
+@@ -627,20 +579,17 @@ xfs_trans_unreserve_and_mod_sb(
+       /* apply the per-cpu counters */
+       if (blkdelta) {
+               error = xfs_mod_fdblocks(mp, blkdelta, rsvd);
+-              if (error)
+-                      goto out;
++              ASSERT(!error);
+       }
+       if (idelta) {
+               error = xfs_mod_icount(mp, idelta);
+-              if (error)
+-                      goto out_undo_fdblocks;
++              ASSERT(!error);
+       }
+       if (ifreedelta) {
+               error = xfs_mod_ifree(mp, ifreedelta);
+-              if (error)
+-                      goto out_undo_icount;
++              ASSERT(!error);
+       }
+       if (rtxdelta == 0 && !(tp->t_flags & XFS_TRANS_SB_DIRTY))
+@@ -648,95 +597,23 @@ xfs_trans_unreserve_and_mod_sb(
+       /* apply remaining deltas */
+       spin_lock(&mp->m_sb_lock);
+-      if (rtxdelta) {
+-              error = xfs_sb_mod64(&mp->m_sb.sb_frextents, rtxdelta);
+-              if (error)
+-                      goto out_undo_ifree;
+-      }
+-
+-      if (tp->t_dblocks_delta != 0) {
+-              error = xfs_sb_mod64(&mp->m_sb.sb_dblocks, tp->t_dblocks_delta);
+-              if (error)
+-                      goto out_undo_frextents;
+-      }
+-      if (tp->t_agcount_delta != 0) {
+-              error = xfs_sb_mod32(&mp->m_sb.sb_agcount, tp->t_agcount_delta);
+-              if (error)
+-                      goto out_undo_dblocks;
+-      }
+-      if (tp->t_imaxpct_delta != 0) {
+-              error = xfs_sb_mod8(&mp->m_sb.sb_imax_pct, tp->t_imaxpct_delta);
+-              if (error)
+-                      goto out_undo_agcount;
+-      }
+-      if (tp->t_rextsize_delta != 0) {
+-              error = xfs_sb_mod32(&mp->m_sb.sb_rextsize,
+-                                   tp->t_rextsize_delta);
+-              if (error)
+-                      goto out_undo_imaxpct;
+-      }
+-      if (tp->t_rbmblocks_delta != 0) {
+-              error = xfs_sb_mod32(&mp->m_sb.sb_rbmblocks,
+-                                   tp->t_rbmblocks_delta);
+-              if (error)
+-                      goto out_undo_rextsize;
+-      }
+-      if (tp->t_rblocks_delta != 0) {
+-              error = xfs_sb_mod64(&mp->m_sb.sb_rblocks, tp->t_rblocks_delta);
+-              if (error)
+-                      goto out_undo_rbmblocks;
+-      }
+-      if (tp->t_rextents_delta != 0) {
+-              error = xfs_sb_mod64(&mp->m_sb.sb_rextents,
+-                                   tp->t_rextents_delta);
+-              if (error)
+-                      goto out_undo_rblocks;
+-      }
+-      if (tp->t_rextslog_delta != 0) {
+-              error = xfs_sb_mod8(&mp->m_sb.sb_rextslog,
+-                                   tp->t_rextslog_delta);
+-              if (error)
+-                      goto out_undo_rextents;
+-      }
++      mp->m_sb.sb_frextents += rtxdelta;
++      mp->m_sb.sb_dblocks += tp->t_dblocks_delta;
++      mp->m_sb.sb_agcount += tp->t_agcount_delta;
++      mp->m_sb.sb_imax_pct += tp->t_imaxpct_delta;
++      mp->m_sb.sb_rextsize += tp->t_rextsize_delta;
++      mp->m_sb.sb_rbmblocks += tp->t_rbmblocks_delta;
++      mp->m_sb.sb_rblocks += tp->t_rblocks_delta;
++      mp->m_sb.sb_rextents += tp->t_rextents_delta;
++      mp->m_sb.sb_rextslog += tp->t_rextslog_delta;
+       spin_unlock(&mp->m_sb_lock);
+-      return;
+-out_undo_rextents:
+-      if (tp->t_rextents_delta)
+-              xfs_sb_mod64(&mp->m_sb.sb_rextents, -tp->t_rextents_delta);
+-out_undo_rblocks:
+-      if (tp->t_rblocks_delta)
+-              xfs_sb_mod64(&mp->m_sb.sb_rblocks, -tp->t_rblocks_delta);
+-out_undo_rbmblocks:
+-      if (tp->t_rbmblocks_delta)
+-              xfs_sb_mod32(&mp->m_sb.sb_rbmblocks, -tp->t_rbmblocks_delta);
+-out_undo_rextsize:
+-      if (tp->t_rextsize_delta)
+-              xfs_sb_mod32(&mp->m_sb.sb_rextsize, -tp->t_rextsize_delta);
+-out_undo_imaxpct:
+-      if (tp->t_rextsize_delta)
+-              xfs_sb_mod8(&mp->m_sb.sb_imax_pct, -tp->t_imaxpct_delta);
+-out_undo_agcount:
+-      if (tp->t_agcount_delta)
+-              xfs_sb_mod32(&mp->m_sb.sb_agcount, -tp->t_agcount_delta);
+-out_undo_dblocks:
+-      if (tp->t_dblocks_delta)
+-              xfs_sb_mod64(&mp->m_sb.sb_dblocks, -tp->t_dblocks_delta);
+-out_undo_frextents:
+-      if (rtxdelta)
+-              xfs_sb_mod64(&mp->m_sb.sb_frextents, -rtxdelta);
+-out_undo_ifree:
+-      spin_unlock(&mp->m_sb_lock);
+-      if (ifreedelta)
+-              xfs_mod_ifree(mp, -ifreedelta);
+-out_undo_icount:
+-      if (idelta)
+-              xfs_mod_icount(mp, -idelta);
+-out_undo_fdblocks:
+-      if (blkdelta)
+-              xfs_mod_fdblocks(mp, -blkdelta, rsvd);
+-out:
+-      ASSERT(error == 0);
++      /*
++       * Debug checks outside of the spinlock so they don't lock up the
++       * machine if they fail.
++       */
++      ASSERT(mp->m_sb.sb_imax_pct >= 0);
++      ASSERT(mp->m_sb.sb_rextslog >= 0);
+       return;
+ }
diff --git a/queue-5.4/xfs-use-ordered-buffers-to-initialize-dquot-buffers-during-quotacheck.patch b/queue-5.4/xfs-use-ordered-buffers-to-initialize-dquot-buffers-during-quotacheck.patch
new file mode 100644 (file)
index 0000000..2e82a96
--- /dev/null
@@ -0,0 +1,206 @@
+From foo@baz Mon Nov  7 09:49:06 AM CET 2022
+From: Chandan Babu R <chandan.babu@oracle.com>
+Date: Mon,  7 Nov 2022 09:33:23 +0530
+Subject: xfs: use ordered buffers to initialize dquot buffers during quotacheck
+To: gregkh@linuxfoundation.org
+Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com
+Message-ID: <20221107040327.132719-3-chandan.babu@oracle.com>
+
+From: "Darrick J. Wong" <darrick.wong@oracle.com>
+
+commit 78bba5c812cc651cee51b64b786be926ab7fe2a9 upstream.
+
+While QAing the new xfs_repair quotacheck code, I uncovered a quota
+corruption bug resulting from a bad interaction between dquot buffer
+initialization and quotacheck.  The bug can be reproduced with the
+following sequence:
+
+# mkfs.xfs -f /dev/sdf
+# mount /dev/sdf /opt -o usrquota
+# su nobody -s /bin/bash -c 'touch /opt/barf'
+# sync
+# xfs_quota -x -c 'report -ahi' /opt
+User quota on /opt (/dev/sdf)
+                        Inodes
+User ID      Used   Soft   Hard Warn/Grace
+---------- ---------------------------------
+root            3      0      0  00 [------]
+nobody          1      0      0  00 [------]
+
+# xfs_io -x -c 'shutdown' /opt
+# umount /opt
+# mount /dev/sdf /opt -o usrquota
+# touch /opt/man2
+# xfs_quota -x -c 'report -ahi' /opt
+User quota on /opt (/dev/sdf)
+                        Inodes
+User ID      Used   Soft   Hard Warn/Grace
+---------- ---------------------------------
+root            1      0      0  00 [------]
+nobody          1      0      0  00 [------]
+
+# umount /opt
+
+Notice how the initial quotacheck set the root dquot icount to 3
+(rootino, rbmino, rsumino), but after shutdown -> remount -> recovery,
+xfs_quota reports that the root dquot has only 1 icount.  We haven't
+deleted anything from the filesystem, which means that quota is now
+under-counting.  This behavior is not limited to icount or the root
+dquot, but this is the shortest reproducer.
+
+I traced the cause of this discrepancy to the way that we handle ondisk
+dquot updates during quotacheck vs. regular fs activity.  Normally, when
+we allocate a disk block for a dquot, we log the buffer as a regular
+(dquot) buffer.  Subsequent updates to the dquots backed by that block
+are done via separate dquot log item updates, which means that they
+depend on the logged buffer update being written to disk before the
+dquot items.  Because individual dquots have their own LSN fields, that
+initial dquot buffer must always be recovered.
+
+However, the story changes for quotacheck, which can cause dquot block
+allocations but persists the final dquot counter values via a delwri
+list.  Because recovery doesn't gate dquot buffer replay on an LSN, this
+means that the initial dquot buffer can be replayed over the (newer)
+contents that were delwritten at the end of quotacheck.  In effect, this
+re-initializes the dquot counters after they've been updated.  If the
+log does not contain any other dquot items to recover, the obsolete
+dquot contents will not be corrected by log recovery.
+
+Because quotacheck uses a transaction to log the setting of the CHKD
+flags in the superblock, we skip quotacheck during the second mount
+call, which allows the incorrect icount to remain.
+
+Fix this by changing the ondisk dquot initialization function to use
+ordered buffers to write out fresh dquot blocks if it detects that we're
+running quotacheck.  If the system goes down before quotacheck can
+complete, the CHKD flags will not be set in the superblock and the next
+mount will run quotacheck again, which can fix uninitialized dquot
+buffers.  This requires amending the defer code to maintaine ordered
+buffer state across defer rolls for the sake of the dquot allocation
+code.
+
+For regular operations we preserve the current behavior since the dquot
+items require properly initialized ondisk dquot records.
+
+Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_defer.c |   10 +++++++-
+ fs/xfs/xfs_dquot.c        |   56 +++++++++++++++++++++++++++++++++++-----------
+ 2 files changed, 52 insertions(+), 14 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_defer.c
++++ b/fs/xfs/libxfs/xfs_defer.c
+@@ -234,10 +234,13 @@ xfs_defer_trans_roll(
+       struct xfs_log_item             *lip;
+       struct xfs_buf                  *bplist[XFS_DEFER_OPS_NR_BUFS];
+       struct xfs_inode                *iplist[XFS_DEFER_OPS_NR_INODES];
++      unsigned int                    ordered = 0; /* bitmap */
+       int                             bpcount = 0, ipcount = 0;
+       int                             i;
+       int                             error;
++      BUILD_BUG_ON(NBBY * sizeof(ordered) < XFS_DEFER_OPS_NR_BUFS);
++
+       list_for_each_entry(lip, &tp->t_items, li_trans) {
+               switch (lip->li_type) {
+               case XFS_LI_BUF:
+@@ -248,7 +251,10 @@ xfs_defer_trans_roll(
+                                       ASSERT(0);
+                                       return -EFSCORRUPTED;
+                               }
+-                              xfs_trans_dirty_buf(tp, bli->bli_buf);
++                              if (bli->bli_flags & XFS_BLI_ORDERED)
++                                      ordered |= (1U << bpcount);
++                              else
++                                      xfs_trans_dirty_buf(tp, bli->bli_buf);
+                               bplist[bpcount++] = bli->bli_buf;
+                       }
+                       break;
+@@ -289,6 +295,8 @@ xfs_defer_trans_roll(
+       /* Rejoin the buffers and dirty them so the log moves forward. */
+       for (i = 0; i < bpcount; i++) {
+               xfs_trans_bjoin(tp, bplist[i]);
++              if (ordered & (1U << i))
++                      xfs_trans_ordered_buf(tp, bplist[i]);
+               xfs_trans_bhold(tp, bplist[i]);
+       }
+--- a/fs/xfs/xfs_dquot.c
++++ b/fs/xfs/xfs_dquot.c
+@@ -205,16 +205,18 @@ xfs_qm_adjust_dqtimers(
+  */
+ STATIC void
+ xfs_qm_init_dquot_blk(
+-      xfs_trans_t     *tp,
+-      xfs_mount_t     *mp,
+-      xfs_dqid_t      id,
+-      uint            type,
+-      xfs_buf_t       *bp)
++      struct xfs_trans        *tp,
++      struct xfs_mount        *mp,
++      xfs_dqid_t              id,
++      uint                    type,
++      struct xfs_buf          *bp)
+ {
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+-      xfs_dqblk_t     *d;
+-      xfs_dqid_t      curid;
+-      int             i;
++      struct xfs_dqblk        *d;
++      xfs_dqid_t              curid;
++      unsigned int            qflag;
++      unsigned int            blftype;
++      int                     i;
+       ASSERT(tp);
+       ASSERT(xfs_buf_islocked(bp));
+@@ -238,11 +240,39 @@ xfs_qm_init_dquot_blk(
+               }
+       }
+-      xfs_trans_dquot_buf(tp, bp,
+-                          (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
+-                          ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
+-                           XFS_BLF_GDQUOT_BUF)));
+-      xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
++      if (type & XFS_DQ_USER) {
++              qflag = XFS_UQUOTA_CHKD;
++              blftype = XFS_BLF_UDQUOT_BUF;
++      } else if (type & XFS_DQ_PROJ) {
++              qflag = XFS_PQUOTA_CHKD;
++              blftype = XFS_BLF_PDQUOT_BUF;
++      } else {
++              qflag = XFS_GQUOTA_CHKD;
++              blftype = XFS_BLF_GDQUOT_BUF;
++      }
++
++      xfs_trans_dquot_buf(tp, bp, blftype);
++
++      /*
++       * quotacheck uses delayed writes to update all the dquots on disk in an
++       * efficient manner instead of logging the individual dquot changes as
++       * they are made. However if we log the buffer allocated here and crash
++       * after quotacheck while the logged initialisation is still in the
++       * active region of the log, log recovery can replay the dquot buffer
++       * initialisation over the top of the checked dquots and corrupt quota
++       * accounting.
++       *
++       * To avoid this problem, quotacheck cannot log the initialised buffer.
++       * We must still dirty the buffer and write it back before the
++       * allocation transaction clears the log. Therefore, mark the buffer as
++       * ordered instead of logging it directly. This is safe for quotacheck
++       * because it detects and repairs allocated but initialized dquot blocks
++       * in the quota inodes.
++       */
++      if (!(mp->m_qflags & qflag))
++              xfs_trans_ordered_buf(tp, bp);
++      else
++              xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
+ }
+ /*