From 44e24d1524c34b54937d837dc5283e2a5cb1886a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 7 Nov 2022 09:52:36 +0100 Subject: [PATCH] 5.4-stable patches added patches: xfs-add-the-missed-xfs_perag_put-for-xfs_ifree_cluster.patch xfs-don-t-fail-unwritten-extent-conversion-on-writeback-due-to-edquot.patch xfs-don-t-fail-verifier-on-empty-attr3-leaf-block.patch xfs-group-quota-should-return-edquot-when-prj-quota-enabled.patch xfs-gut-error-handling-in-xfs_trans_unreserve_and_mod_sb.patch xfs-use-ordered-buffers-to-initialize-dquot-buffers-during-quotacheck.patch --- queue-5.4/series | 6 + ...-xfs_perag_put-for-xfs_ifree_cluster.patch | 41 +++ ...onversion-on-writeback-due-to-edquot.patch | 43 ++++ ...l-verifier-on-empty-attr3-leaf-block.patch | 68 +++++ ...return-edquot-when-prj-quota-enabled.patch | 48 ++++ ...ng-in-xfs_trans_unreserve_and_mod_sb.patch | 234 ++++++++++++++++++ ...lize-dquot-buffers-during-quotacheck.patch | 206 +++++++++++++++ 7 files changed, 646 insertions(+) create mode 100644 queue-5.4/xfs-add-the-missed-xfs_perag_put-for-xfs_ifree_cluster.patch create mode 100644 queue-5.4/xfs-don-t-fail-unwritten-extent-conversion-on-writeback-due-to-edquot.patch create mode 100644 queue-5.4/xfs-don-t-fail-verifier-on-empty-attr3-leaf-block.patch create mode 100644 queue-5.4/xfs-group-quota-should-return-edquot-when-prj-quota-enabled.patch create mode 100644 queue-5.4/xfs-gut-error-handling-in-xfs_trans_unreserve_and_mod_sb.patch create mode 100644 queue-5.4/xfs-use-ordered-buffers-to-initialize-dquot-buffers-during-quotacheck.patch diff --git a/queue-5.4/series b/queue-5.4/series index 046cceb483e..b41f8545768 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -34,3 +34,9 @@ media-meson-vdec-fix-possible-refcount-leak-in-vdec_.patch scsi-core-restrict-legal-sdev_state-transitions-via-.patch hid-saitek-add-madcatz-variant-of-mmo7-mouse-device-.patch i2c-xiic-add-platform-module-alias.patch +xfs-don-t-fail-verifier-on-empty-attr3-leaf-block.patch +xfs-use-ordered-buffers-to-initialize-dquot-buffers-during-quotacheck.patch +xfs-gut-error-handling-in-xfs_trans_unreserve_and_mod_sb.patch +xfs-group-quota-should-return-edquot-when-prj-quota-enabled.patch +xfs-don-t-fail-unwritten-extent-conversion-on-writeback-due-to-edquot.patch +xfs-add-the-missed-xfs_perag_put-for-xfs_ifree_cluster.patch diff --git a/queue-5.4/xfs-add-the-missed-xfs_perag_put-for-xfs_ifree_cluster.patch b/queue-5.4/xfs-add-the-missed-xfs_perag_put-for-xfs_ifree_cluster.patch new file mode 100644 index 00000000000..22beff9d9e0 --- /dev/null +++ b/queue-5.4/xfs-add-the-missed-xfs_perag_put-for-xfs_ifree_cluster.patch @@ -0,0 +1,41 @@ +From foo@baz Mon Nov 7 09:49:06 AM CET 2022 +From: Chandan Babu R +Date: Mon, 7 Nov 2022 09:33:27 +0530 +Subject: xfs: Add the missed xfs_perag_put() for xfs_ifree_cluster() +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221107040327.132719-7-chandan.babu@oracle.com> + +From: Chuhong Yuan + +commit 8cc0072469723459dc6bd7beff81b2b3149f4cf4 upstream. + +xfs_ifree_cluster() calls xfs_perag_get() at the beginning, but forgets to +call xfs_perag_put() in one failed path. +Add the missed function call to fix it. + +Fixes: ce92464c180b ("xfs: make xfs_trans_get_buf return an error code") +Signed-off-by: Chuhong Yuan +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_inode.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -2592,8 +2592,10 @@ xfs_ifree_cluster( + mp->m_bsize * igeo->blocks_per_cluster, + XBF_UNMAPPED); + +- if (!bp) ++ if (!bp) { ++ xfs_perag_put(pag); + return -ENOMEM; ++ } + + /* + * This buffer may not have been correctly initialised as we diff --git a/queue-5.4/xfs-don-t-fail-unwritten-extent-conversion-on-writeback-due-to-edquot.patch b/queue-5.4/xfs-don-t-fail-unwritten-extent-conversion-on-writeback-due-to-edquot.patch new file mode 100644 index 00000000000..f024440f747 --- /dev/null +++ b/queue-5.4/xfs-don-t-fail-unwritten-extent-conversion-on-writeback-due-to-edquot.patch @@ -0,0 +1,43 @@ +From foo@baz Mon Nov 7 09:49:06 AM CET 2022 +From: Chandan Babu R +Date: Mon, 7 Nov 2022 09:33:26 +0530 +Subject: xfs: don't fail unwritten extent conversion on writeback due to edquot +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221107040327.132719-6-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 1edd2c055dff9710b1e29d4df01902abb0a55f1f upstream. + +During writeback, it's possible for the quota block reservation in +xfs_iomap_write_unwritten to fail with EDQUOT because we hit the quota +limit. This causes writeback errors for data that was already written +to disk, when it's not even guaranteed that the bmbt will expand to +exceed the quota limit. Irritatingly, this condition is reported to +userspace as EIO by fsync, which is confusing. + +We wrote the data, so allow the reservation. That might put us slightly +above the hard limit, but it's better than losing data after a write. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Brian Foster +Reviewed-by: Christoph Hellwig +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_iomap.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/xfs/xfs_iomap.c ++++ b/fs/xfs/xfs_iomap.c +@@ -789,7 +789,7 @@ xfs_iomap_write_unwritten( + xfs_trans_ijoin(tp, ip, 0); + + error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0, +- XFS_QMOPT_RES_REGBLKS); ++ XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES); + if (error) + goto error_on_bmapi_transaction; + diff --git a/queue-5.4/xfs-don-t-fail-verifier-on-empty-attr3-leaf-block.patch b/queue-5.4/xfs-don-t-fail-verifier-on-empty-attr3-leaf-block.patch new file mode 100644 index 00000000000..8a1265745e4 --- /dev/null +++ b/queue-5.4/xfs-don-t-fail-verifier-on-empty-attr3-leaf-block.patch @@ -0,0 +1,68 @@ +From foo@baz Mon Nov 7 09:49:06 AM CET 2022 +From: Chandan Babu R +Date: Mon, 7 Nov 2022 09:33:22 +0530 +Subject: xfs: don't fail verifier on empty attr3 leaf block +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221107040327.132719-2-chandan.babu@oracle.com> + +From: Brian Foster + +commit f28cef9e4daca11337cb9f144cdebedaab69d78c upstream. + +The attr fork can transition from shortform to leaf format while +empty if the first xattr doesn't fit in shortform. While this empty +leaf block state is intended to be transient, it is technically not +due to the transactional implementation of the xattr set operation. + +We historically have a couple of bandaids to work around this +problem. The first is to hold the buffer after the format conversion +to prevent premature writeback of the empty leaf buffer and the +second is to bypass the xattr count check in the verifier during +recovery. The latter assumes that the xattr set is also in the log +and will be recovered into the buffer soon after the empty leaf +buffer is reconstructed. This is not guaranteed, however. + +If the filesystem crashes after the format conversion but before the +xattr set that induced it, only the format conversion may exist in +the log. When recovered, this creates a latent corrupted state on +the inode as any subsequent attempts to read the buffer fail due to +verifier failure. This includes further attempts to set xattrs on +the inode or attempts to destroy the attr fork, which prevents the +inode from ever being removed from the unlinked list. + +To avoid this condition, accept that an empty attr leaf block is a +valid state and remove the count check from the verifier. This means +that on rare occasions an attr fork might exist in an unexpected +state, but is otherwise consistent and functional. Note that we +retain the logic to avoid racing with metadata writeback to reduce +the window where this can occur. + +Signed-off-by: Brian Foster +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_attr_leaf.c | 8 -------- + 1 file changed, 8 deletions(-) + +--- a/fs/xfs/libxfs/xfs_attr_leaf.c ++++ b/fs/xfs/libxfs/xfs_attr_leaf.c +@@ -251,14 +251,6 @@ xfs_attr3_leaf_verify( + return fa; + + /* +- * In recovery there is a transient state where count == 0 is valid +- * because we may have transitioned an empty shortform attr to a leaf +- * if the attr didn't fit in shortform. +- */ +- if (!xfs_log_in_recovery(mp) && ichdr.count == 0) +- return __this_address; +- +- /* + * firstused is the block offset of the first name info structure. + * Make sure it doesn't go off the block or crash into the header. + */ diff --git a/queue-5.4/xfs-group-quota-should-return-edquot-when-prj-quota-enabled.patch b/queue-5.4/xfs-group-quota-should-return-edquot-when-prj-quota-enabled.patch new file mode 100644 index 00000000000..9d04aa089b1 --- /dev/null +++ b/queue-5.4/xfs-group-quota-should-return-edquot-when-prj-quota-enabled.patch @@ -0,0 +1,48 @@ +From foo@baz Mon Nov 7 09:49:06 AM CET 2022 +From: Chandan Babu R +Date: Mon, 7 Nov 2022 09:33:25 +0530 +Subject: xfs: group quota should return EDQUOT when prj quota enabled +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221107040327.132719-5-chandan.babu@oracle.com> + +From: Eric Sandeen + +commit c8d329f311c4d3d8f8e6dc5897ec235e37f48ae8 upstream. + +Long ago, group & project quota were mutually exclusive, and so +when we turned on XFS_QMOPT_ENOSPC ("return ENOSPC if project quota +is exceeded") when project quota was enabled, we only needed to +disable it again for user quota. + +When group & project quota got separated, this got missed, and as a +result if project quota is enabled and group quota is exceeded, the +error code returned is incorrectly returned as ENOSPC not EDQUOT. + +Fix this by stripping XFS_QMOPT_ENOSPC out of flags for group +quota when we try to reserve the space. + +Signed-off-by: Eric Sandeen +Reviewed-by: Christoph Hellwig +Reviewed-by: Brian Foster +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_trans_dquot.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/fs/xfs/xfs_trans_dquot.c ++++ b/fs/xfs/xfs_trans_dquot.c +@@ -756,7 +756,8 @@ xfs_trans_reserve_quota_bydquots( + } + + if (gdqp) { +- error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags); ++ error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, ++ (flags & ~XFS_QMOPT_ENOSPC)); + if (error) + goto unwind_usr; + } diff --git a/queue-5.4/xfs-gut-error-handling-in-xfs_trans_unreserve_and_mod_sb.patch b/queue-5.4/xfs-gut-error-handling-in-xfs_trans_unreserve_and_mod_sb.patch new file mode 100644 index 00000000000..1a60cc8959e --- /dev/null +++ b/queue-5.4/xfs-gut-error-handling-in-xfs_trans_unreserve_and_mod_sb.patch @@ -0,0 +1,234 @@ +From foo@baz Mon Nov 7 09:49:06 AM CET 2022 +From: Chandan Babu R +Date: Mon, 7 Nov 2022 09:33:24 +0530 +Subject: xfs: gut error handling in xfs_trans_unreserve_and_mod_sb() +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221107040327.132719-4-chandan.babu@oracle.com> + +From: Dave Chinner + +commit dc3ffbb14060c943469d5e12900db3a60bc3fa64 upstream. + +The error handling in xfs_trans_unreserve_and_mod_sb() is largely +incorrect - rolling back the changes in the transaction if only one +counter underruns makes all the other counters incorrect. We still +allow the change to proceed and committing the transaction, except +now we have multiple incorrect counters instead of a single +underflow. + +Further, we don't actually report the error to the caller, so this +is completely silent except on debug kernels that will assert on +failure before we even get to the rollback code. Hence this error +handling is broken, untested, and largely unnecessary complexity. + +Just remove it. + +Signed-off-by: Dave Chinner +Reviewed-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/xfs_trans.c | 163 ++++++----------------------------------------------- + 1 file changed, 20 insertions(+), 143 deletions(-) + +--- a/fs/xfs/xfs_trans.c ++++ b/fs/xfs/xfs_trans.c +@@ -532,57 +532,9 @@ xfs_trans_apply_sb_deltas( + sizeof(sbp->sb_frextents) - 1); + } + +-STATIC int +-xfs_sb_mod8( +- uint8_t *field, +- int8_t delta) +-{ +- int8_t counter = *field; +- +- counter += delta; +- if (counter < 0) { +- ASSERT(0); +- return -EINVAL; +- } +- *field = counter; +- return 0; +-} +- +-STATIC int +-xfs_sb_mod32( +- uint32_t *field, +- int32_t delta) +-{ +- int32_t counter = *field; +- +- counter += delta; +- if (counter < 0) { +- ASSERT(0); +- return -EINVAL; +- } +- *field = counter; +- return 0; +-} +- +-STATIC int +-xfs_sb_mod64( +- uint64_t *field, +- int64_t delta) +-{ +- int64_t counter = *field; +- +- counter += delta; +- if (counter < 0) { +- ASSERT(0); +- return -EINVAL; +- } +- *field = counter; +- return 0; +-} +- + /* +- * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations +- * and apply superblock counter changes to the in-core superblock. The ++ * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations and ++ * apply superblock counter changes to the in-core superblock. The + * t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT + * applied to the in-core superblock. The idea is that that has already been + * done. +@@ -627,20 +579,17 @@ xfs_trans_unreserve_and_mod_sb( + /* apply the per-cpu counters */ + if (blkdelta) { + error = xfs_mod_fdblocks(mp, blkdelta, rsvd); +- if (error) +- goto out; ++ ASSERT(!error); + } + + if (idelta) { + error = xfs_mod_icount(mp, idelta); +- if (error) +- goto out_undo_fdblocks; ++ ASSERT(!error); + } + + if (ifreedelta) { + error = xfs_mod_ifree(mp, ifreedelta); +- if (error) +- goto out_undo_icount; ++ ASSERT(!error); + } + + if (rtxdelta == 0 && !(tp->t_flags & XFS_TRANS_SB_DIRTY)) +@@ -648,95 +597,23 @@ xfs_trans_unreserve_and_mod_sb( + + /* apply remaining deltas */ + spin_lock(&mp->m_sb_lock); +- if (rtxdelta) { +- error = xfs_sb_mod64(&mp->m_sb.sb_frextents, rtxdelta); +- if (error) +- goto out_undo_ifree; +- } +- +- if (tp->t_dblocks_delta != 0) { +- error = xfs_sb_mod64(&mp->m_sb.sb_dblocks, tp->t_dblocks_delta); +- if (error) +- goto out_undo_frextents; +- } +- if (tp->t_agcount_delta != 0) { +- error = xfs_sb_mod32(&mp->m_sb.sb_agcount, tp->t_agcount_delta); +- if (error) +- goto out_undo_dblocks; +- } +- if (tp->t_imaxpct_delta != 0) { +- error = xfs_sb_mod8(&mp->m_sb.sb_imax_pct, tp->t_imaxpct_delta); +- if (error) +- goto out_undo_agcount; +- } +- if (tp->t_rextsize_delta != 0) { +- error = xfs_sb_mod32(&mp->m_sb.sb_rextsize, +- tp->t_rextsize_delta); +- if (error) +- goto out_undo_imaxpct; +- } +- if (tp->t_rbmblocks_delta != 0) { +- error = xfs_sb_mod32(&mp->m_sb.sb_rbmblocks, +- tp->t_rbmblocks_delta); +- if (error) +- goto out_undo_rextsize; +- } +- if (tp->t_rblocks_delta != 0) { +- error = xfs_sb_mod64(&mp->m_sb.sb_rblocks, tp->t_rblocks_delta); +- if (error) +- goto out_undo_rbmblocks; +- } +- if (tp->t_rextents_delta != 0) { +- error = xfs_sb_mod64(&mp->m_sb.sb_rextents, +- tp->t_rextents_delta); +- if (error) +- goto out_undo_rblocks; +- } +- if (tp->t_rextslog_delta != 0) { +- error = xfs_sb_mod8(&mp->m_sb.sb_rextslog, +- tp->t_rextslog_delta); +- if (error) +- goto out_undo_rextents; +- } ++ mp->m_sb.sb_frextents += rtxdelta; ++ mp->m_sb.sb_dblocks += tp->t_dblocks_delta; ++ mp->m_sb.sb_agcount += tp->t_agcount_delta; ++ mp->m_sb.sb_imax_pct += tp->t_imaxpct_delta; ++ mp->m_sb.sb_rextsize += tp->t_rextsize_delta; ++ mp->m_sb.sb_rbmblocks += tp->t_rbmblocks_delta; ++ mp->m_sb.sb_rblocks += tp->t_rblocks_delta; ++ mp->m_sb.sb_rextents += tp->t_rextents_delta; ++ mp->m_sb.sb_rextslog += tp->t_rextslog_delta; + spin_unlock(&mp->m_sb_lock); +- return; + +-out_undo_rextents: +- if (tp->t_rextents_delta) +- xfs_sb_mod64(&mp->m_sb.sb_rextents, -tp->t_rextents_delta); +-out_undo_rblocks: +- if (tp->t_rblocks_delta) +- xfs_sb_mod64(&mp->m_sb.sb_rblocks, -tp->t_rblocks_delta); +-out_undo_rbmblocks: +- if (tp->t_rbmblocks_delta) +- xfs_sb_mod32(&mp->m_sb.sb_rbmblocks, -tp->t_rbmblocks_delta); +-out_undo_rextsize: +- if (tp->t_rextsize_delta) +- xfs_sb_mod32(&mp->m_sb.sb_rextsize, -tp->t_rextsize_delta); +-out_undo_imaxpct: +- if (tp->t_rextsize_delta) +- xfs_sb_mod8(&mp->m_sb.sb_imax_pct, -tp->t_imaxpct_delta); +-out_undo_agcount: +- if (tp->t_agcount_delta) +- xfs_sb_mod32(&mp->m_sb.sb_agcount, -tp->t_agcount_delta); +-out_undo_dblocks: +- if (tp->t_dblocks_delta) +- xfs_sb_mod64(&mp->m_sb.sb_dblocks, -tp->t_dblocks_delta); +-out_undo_frextents: +- if (rtxdelta) +- xfs_sb_mod64(&mp->m_sb.sb_frextents, -rtxdelta); +-out_undo_ifree: +- spin_unlock(&mp->m_sb_lock); +- if (ifreedelta) +- xfs_mod_ifree(mp, -ifreedelta); +-out_undo_icount: +- if (idelta) +- xfs_mod_icount(mp, -idelta); +-out_undo_fdblocks: +- if (blkdelta) +- xfs_mod_fdblocks(mp, -blkdelta, rsvd); +-out: +- ASSERT(error == 0); ++ /* ++ * Debug checks outside of the spinlock so they don't lock up the ++ * machine if they fail. ++ */ ++ ASSERT(mp->m_sb.sb_imax_pct >= 0); ++ ASSERT(mp->m_sb.sb_rextslog >= 0); + return; + } + diff --git a/queue-5.4/xfs-use-ordered-buffers-to-initialize-dquot-buffers-during-quotacheck.patch b/queue-5.4/xfs-use-ordered-buffers-to-initialize-dquot-buffers-during-quotacheck.patch new file mode 100644 index 00000000000..2e82a9682cc --- /dev/null +++ b/queue-5.4/xfs-use-ordered-buffers-to-initialize-dquot-buffers-during-quotacheck.patch @@ -0,0 +1,206 @@ +From foo@baz Mon Nov 7 09:49:06 AM CET 2022 +From: Chandan Babu R +Date: Mon, 7 Nov 2022 09:33:23 +0530 +Subject: xfs: use ordered buffers to initialize dquot buffers during quotacheck +To: gregkh@linuxfoundation.org +Cc: sashal@kernel.org, mcgrof@kernel.org, linux-xfs@vger.kernel.org, stable@vger.kernel.org, djwong@kernel.org, chandan.babu@oracle.com, amir73il@gmail.com, leah.rumancik@gmail.com +Message-ID: <20221107040327.132719-3-chandan.babu@oracle.com> + +From: "Darrick J. Wong" + +commit 78bba5c812cc651cee51b64b786be926ab7fe2a9 upstream. + +While QAing the new xfs_repair quotacheck code, I uncovered a quota +corruption bug resulting from a bad interaction between dquot buffer +initialization and quotacheck. The bug can be reproduced with the +following sequence: + +# mkfs.xfs -f /dev/sdf +# mount /dev/sdf /opt -o usrquota +# su nobody -s /bin/bash -c 'touch /opt/barf' +# sync +# xfs_quota -x -c 'report -ahi' /opt +User quota on /opt (/dev/sdf) + Inodes +User ID Used Soft Hard Warn/Grace +---------- --------------------------------- +root 3 0 0 00 [------] +nobody 1 0 0 00 [------] + +# xfs_io -x -c 'shutdown' /opt +# umount /opt +# mount /dev/sdf /opt -o usrquota +# touch /opt/man2 +# xfs_quota -x -c 'report -ahi' /opt +User quota on /opt (/dev/sdf) + Inodes +User ID Used Soft Hard Warn/Grace +---------- --------------------------------- +root 1 0 0 00 [------] +nobody 1 0 0 00 [------] + +# umount /opt + +Notice how the initial quotacheck set the root dquot icount to 3 +(rootino, rbmino, rsumino), but after shutdown -> remount -> recovery, +xfs_quota reports that the root dquot has only 1 icount. We haven't +deleted anything from the filesystem, which means that quota is now +under-counting. This behavior is not limited to icount or the root +dquot, but this is the shortest reproducer. + +I traced the cause of this discrepancy to the way that we handle ondisk +dquot updates during quotacheck vs. regular fs activity. Normally, when +we allocate a disk block for a dquot, we log the buffer as a regular +(dquot) buffer. Subsequent updates to the dquots backed by that block +are done via separate dquot log item updates, which means that they +depend on the logged buffer update being written to disk before the +dquot items. Because individual dquots have their own LSN fields, that +initial dquot buffer must always be recovered. + +However, the story changes for quotacheck, which can cause dquot block +allocations but persists the final dquot counter values via a delwri +list. Because recovery doesn't gate dquot buffer replay on an LSN, this +means that the initial dquot buffer can be replayed over the (newer) +contents that were delwritten at the end of quotacheck. In effect, this +re-initializes the dquot counters after they've been updated. If the +log does not contain any other dquot items to recover, the obsolete +dquot contents will not be corrected by log recovery. + +Because quotacheck uses a transaction to log the setting of the CHKD +flags in the superblock, we skip quotacheck during the second mount +call, which allows the incorrect icount to remain. + +Fix this by changing the ondisk dquot initialization function to use +ordered buffers to write out fresh dquot blocks if it detects that we're +running quotacheck. If the system goes down before quotacheck can +complete, the CHKD flags will not be set in the superblock and the next +mount will run quotacheck again, which can fix uninitialized dquot +buffers. This requires amending the defer code to maintaine ordered +buffer state across defer rolls for the sake of the dquot allocation +code. + +For regular operations we preserve the current behavior since the dquot +items require properly initialized ondisk dquot records. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Brian Foster +Reviewed-by: Christoph Hellwig +Acked-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Greg Kroah-Hartman +--- + fs/xfs/libxfs/xfs_defer.c | 10 +++++++- + fs/xfs/xfs_dquot.c | 56 +++++++++++++++++++++++++++++++++++----------- + 2 files changed, 52 insertions(+), 14 deletions(-) + +--- a/fs/xfs/libxfs/xfs_defer.c ++++ b/fs/xfs/libxfs/xfs_defer.c +@@ -234,10 +234,13 @@ xfs_defer_trans_roll( + struct xfs_log_item *lip; + struct xfs_buf *bplist[XFS_DEFER_OPS_NR_BUFS]; + struct xfs_inode *iplist[XFS_DEFER_OPS_NR_INODES]; ++ unsigned int ordered = 0; /* bitmap */ + int bpcount = 0, ipcount = 0; + int i; + int error; + ++ BUILD_BUG_ON(NBBY * sizeof(ordered) < XFS_DEFER_OPS_NR_BUFS); ++ + list_for_each_entry(lip, &tp->t_items, li_trans) { + switch (lip->li_type) { + case XFS_LI_BUF: +@@ -248,7 +251,10 @@ xfs_defer_trans_roll( + ASSERT(0); + return -EFSCORRUPTED; + } +- xfs_trans_dirty_buf(tp, bli->bli_buf); ++ if (bli->bli_flags & XFS_BLI_ORDERED) ++ ordered |= (1U << bpcount); ++ else ++ xfs_trans_dirty_buf(tp, bli->bli_buf); + bplist[bpcount++] = bli->bli_buf; + } + break; +@@ -289,6 +295,8 @@ xfs_defer_trans_roll( + /* Rejoin the buffers and dirty them so the log moves forward. */ + for (i = 0; i < bpcount; i++) { + xfs_trans_bjoin(tp, bplist[i]); ++ if (ordered & (1U << i)) ++ xfs_trans_ordered_buf(tp, bplist[i]); + xfs_trans_bhold(tp, bplist[i]); + } + +--- a/fs/xfs/xfs_dquot.c ++++ b/fs/xfs/xfs_dquot.c +@@ -205,16 +205,18 @@ xfs_qm_adjust_dqtimers( + */ + STATIC void + xfs_qm_init_dquot_blk( +- xfs_trans_t *tp, +- xfs_mount_t *mp, +- xfs_dqid_t id, +- uint type, +- xfs_buf_t *bp) ++ struct xfs_trans *tp, ++ struct xfs_mount *mp, ++ xfs_dqid_t id, ++ uint type, ++ struct xfs_buf *bp) + { + struct xfs_quotainfo *q = mp->m_quotainfo; +- xfs_dqblk_t *d; +- xfs_dqid_t curid; +- int i; ++ struct xfs_dqblk *d; ++ xfs_dqid_t curid; ++ unsigned int qflag; ++ unsigned int blftype; ++ int i; + + ASSERT(tp); + ASSERT(xfs_buf_islocked(bp)); +@@ -238,11 +240,39 @@ xfs_qm_init_dquot_blk( + } + } + +- xfs_trans_dquot_buf(tp, bp, +- (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF : +- ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF : +- XFS_BLF_GDQUOT_BUF))); +- xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); ++ if (type & XFS_DQ_USER) { ++ qflag = XFS_UQUOTA_CHKD; ++ blftype = XFS_BLF_UDQUOT_BUF; ++ } else if (type & XFS_DQ_PROJ) { ++ qflag = XFS_PQUOTA_CHKD; ++ blftype = XFS_BLF_PDQUOT_BUF; ++ } else { ++ qflag = XFS_GQUOTA_CHKD; ++ blftype = XFS_BLF_GDQUOT_BUF; ++ } ++ ++ xfs_trans_dquot_buf(tp, bp, blftype); ++ ++ /* ++ * quotacheck uses delayed writes to update all the dquots on disk in an ++ * efficient manner instead of logging the individual dquot changes as ++ * they are made. However if we log the buffer allocated here and crash ++ * after quotacheck while the logged initialisation is still in the ++ * active region of the log, log recovery can replay the dquot buffer ++ * initialisation over the top of the checked dquots and corrupt quota ++ * accounting. ++ * ++ * To avoid this problem, quotacheck cannot log the initialised buffer. ++ * We must still dirty the buffer and write it back before the ++ * allocation transaction clears the log. Therefore, mark the buffer as ++ * ordered instead of logging it directly. This is safe for quotacheck ++ * because it detects and repairs allocated but initialized dquot blocks ++ * in the quota inodes. ++ */ ++ if (!(mp->m_qflags & qflag)) ++ xfs_trans_ordered_buf(tp, bp); ++ else ++ xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); + } + + /* -- 2.47.3