--- /dev/null
+From 70132763d5d2e94cd185e3aa92ac6a3ba89068fa Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@linaro.org>
+Date: Thu, 26 May 2022 10:23:14 -0500
+Subject: net: ipa: fix page free in ipa_endpoint_replenish_one()
+
+From: Alex Elder <elder@linaro.org>
+
+commit 70132763d5d2e94cd185e3aa92ac6a3ba89068fa upstream.
+
+Currently the (possibly compound) pages used for receive buffers are
+freed using __free_pages(). But according to this comment above the
+definition of that function, that's wrong:
+ If you want to use the page's reference count to decide
+ when to free the allocation, you should allocate a compound
+ page, and use put_page() instead of __free_pages().
+
+Convert the call to __free_pages() in ipa_endpoint_replenish_one()
+to use put_page() instead.
+
+Fixes: 6a606b90153b8 ("net: ipa: allocate transaction in replenish loop")
+Signed-off-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ipa/ipa_endpoint.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ipa/ipa_endpoint.c
++++ b/drivers/net/ipa/ipa_endpoint.c
+@@ -884,7 +884,7 @@ static int ipa_endpoint_replenish_one(st
+ err_trans_free:
+ gsi_trans_free(trans);
+ err_free_pages:
+- __free_pages(page, get_order(IPA_RX_BUFFER_SIZE));
++ put_page(page);
+
+ return -ENOMEM;
+ }
--- /dev/null
+From 155c0c90bca918de6e4327275dfc1d97fd604115 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@linaro.org>
+Date: Thu, 26 May 2022 10:23:13 -0500
+Subject: net: ipa: fix page free in ipa_endpoint_trans_release()
+
+From: Alex Elder <elder@linaro.org>
+
+commit 155c0c90bca918de6e4327275dfc1d97fd604115 upstream.
+
+Currently the (possibly compound) page used for receive buffers are
+freed using __free_pages(). But according to this comment above the
+definition of that function, that's wrong:
+ If you want to use the page's reference count to decide when
+ to free the allocation, you should allocate a compound page,
+ and use put_page() instead of __free_pages().
+
+Convert the call to __free_pages() in ipa_endpoint_trans_release()
+to use put_page() instead.
+
+Fixes: ed23f02680caa ("net: ipa: define per-endpoint receive buffer size")
+Signed-off-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ipa/ipa_endpoint.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ipa/ipa_endpoint.c
++++ b/drivers/net/ipa/ipa_endpoint.c
+@@ -1179,7 +1179,7 @@ void ipa_endpoint_trans_release(struct i
+ struct page *page = trans->data;
+
+ if (page)
+- __free_pages(page, get_order(IPA_RX_BUFFER_SIZE));
++ put_page(page);
+ }
+ }
+
blk-iolatency-fix-inflight-count-imbalances-and-io-hangs-on-offline.patch
coresight-core-fix-coresight-device-probe-failure-issue.patch
phy-qcom-qmp-fix-reset-controller-leak-on-probe-errors.patch
+net-ipa-fix-page-free-in-ipa_endpoint_trans_release.patch
+net-ipa-fix-page-free-in-ipa_endpoint_replenish_one.patch
+xfs-set-inode-size-after-creating-symlink.patch
+xfs-sync-lazy-sb-accounting-on-quiesce-of-read-only-mounts.patch
+xfs-fix-chown-leaking-delalloc-quota-blocks-when-fssetxattr-fails.patch
+xfs-fix-incorrect-root-dquot-corruption-error-when-switching-group-project-quota-types.patch
+xfs-restore-shutdown-check-in-mapped-write-fault-path.patch
+xfs-force-log-and-push-ail-to-clear-pinned-inodes-when-aborting-mount.patch
+xfs-consider-shutdown-in-bmapbt-cursor-delete-assert.patch
+xfs-assert-in-xfs_btree_del_cursor-should-take-into-account-error.patch
--- /dev/null
+From foo@baz Mon Jun 6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon, 6 Jun 2022 17:32:55 +0300
+Subject: xfs: assert in xfs_btree_del_cursor should take into account error
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org, Dave Chinner <dchinner@redhat.com>
+Message-ID: <20220606143255.685988-9-amir73il@gmail.com>
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 56486f307100e8fc66efa2ebd8a71941fa10bf6f upstream.
+
+xfs/538 on a 1kB block filesystem failed with this assert:
+
+XFS: Assertion failed: cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 || xfs_is_shutdown(cur->bc_mp), file: fs/xfs/libxfs/xfs_btree.c, line: 448
+
+The problem was that an allocation failed unexpectedly in
+xfs_bmbt_alloc_block() after roughly 150,000 minlen allocation error
+injections, resulting in an EFSCORRUPTED error being returned to
+xfs_bmapi_write(). The error occurred on extent-to-btree format
+conversion allocating the new root block:
+
+ RIP: 0010:xfs_bmbt_alloc_block+0x177/0x210
+ Call Trace:
+ <TASK>
+ xfs_btree_new_iroot+0xdf/0x520
+ xfs_btree_make_block_unfull+0x10d/0x1c0
+ xfs_btree_insrec+0x364/0x790
+ xfs_btree_insert+0xaa/0x210
+ xfs_bmap_add_extent_hole_real+0x1fe/0x9a0
+ xfs_bmapi_allocate+0x34c/0x420
+ xfs_bmapi_write+0x53c/0x9c0
+ xfs_alloc_file_space+0xee/0x320
+ xfs_file_fallocate+0x36b/0x450
+ vfs_fallocate+0x148/0x340
+ __x64_sys_fallocate+0x3c/0x70
+ do_syscall_64+0x35/0x80
+ entry_SYSCALL_64_after_hwframe+0x44/0xa
+
+Why the allocation failed at this point is unknown, but is likely
+that we ran the transaction out of reserved space and filesystem out
+of space with bmbt blocks because of all the minlen allocations
+being done causing worst case fragmentation of a large allocation.
+
+Regardless of the cause, we've then called xfs_bmapi_finish() which
+calls xfs_btree_del_cursor(cur, error) to tear down the cursor.
+
+So we have a failed operation, error != 0, cur->bc_ino.allocated > 0
+and the filesystem is still up. The assert fails to take into
+account that allocation can fail with an error and the transaction
+teardown will shut the filesystem down if necessary. i.e. the
+assert needs to check "|| error != 0" as well, because at this point
+shutdown is pending because the current transaction is dirty....
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_btree.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_btree.c
++++ b/fs/xfs/libxfs/xfs_btree.c
+@@ -372,8 +372,14 @@ xfs_btree_del_cursor(
+ break;
+ }
+
++ /*
++ * If we are doing a BMBT update, the number of unaccounted blocks
++ * allocated during this cursor life time should be zero. If it's not
++ * zero, then we should be shut down or on our way to shutdown due to
++ * cancelling a dirty transaction on error.
++ */
+ ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 ||
+- XFS_FORCED_SHUTDOWN(cur->bc_mp));
++ XFS_FORCED_SHUTDOWN(cur->bc_mp) || error != 0);
+ if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
+ kmem_free(cur->bc_ops);
+ kmem_cache_free(xfs_btree_cur_zone, cur);
--- /dev/null
+From foo@baz Mon Jun 6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon, 6 Jun 2022 17:32:54 +0300
+Subject: xfs: consider shutdown in bmapbt cursor delete assert
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org
+Message-ID: <20220606143255.685988-8-amir73il@gmail.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 1cd738b13ae9b29e03d6149f0246c61f76e81fcf upstream.
+
+The assert in xfs_btree_del_cursor() checks that the bmapbt block
+allocation field has been handled correctly before the cursor is
+freed. This field is used for accurate calculation of indirect block
+reservation requirements (for delayed allocations), for example.
+generic/019 reproduces a scenario where this assert fails because
+the filesystem has shutdown while in the middle of a bmbt record
+insertion. This occurs after a bmbt block has been allocated via the
+cursor but before the higher level bmap function (i.e.
+xfs_bmap_add_extent_hole_real()) completes and resets the field.
+
+Update the assert to accommodate the transient state if the
+filesystem has shutdown. While here, clean up the indentation and
+comments in the function.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_btree.c | 33 ++++++++++++---------------------
+ 1 file changed, 12 insertions(+), 21 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_btree.c
++++ b/fs/xfs/libxfs/xfs_btree.c
+@@ -353,20 +353,17 @@ xfs_btree_free_block(
+ */
+ void
+ xfs_btree_del_cursor(
+- xfs_btree_cur_t *cur, /* btree cursor */
+- int error) /* del because of error */
++ struct xfs_btree_cur *cur, /* btree cursor */
++ int error) /* del because of error */
+ {
+- int i; /* btree level */
++ int i; /* btree level */
+
+ /*
+- * Clear the buffer pointers, and release the buffers.
+- * If we're doing this in the face of an error, we
+- * need to make sure to inspect all of the entries
+- * in the bc_bufs array for buffers to be unlocked.
+- * This is because some of the btree code works from
+- * level n down to 0, and if we get an error along
+- * the way we won't have initialized all the entries
+- * down to 0.
++ * Clear the buffer pointers and release the buffers. If we're doing
++ * this because of an error, inspect all of the entries in the bc_bufs
++ * array for buffers to be unlocked. This is because some of the btree
++ * code works from level n down to 0, and if we get an error along the
++ * way we won't have initialized all the entries down to 0.
+ */
+ for (i = 0; i < cur->bc_nlevels; i++) {
+ if (cur->bc_bufs[i])
+@@ -374,17 +371,11 @@ xfs_btree_del_cursor(
+ else if (!error)
+ break;
+ }
+- /*
+- * Can't free a bmap cursor without having dealt with the
+- * allocated indirect blocks' accounting.
+- */
+- ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
+- cur->bc_ino.allocated == 0);
+- /*
+- * Free the cursor.
+- */
++
++ ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 ||
++ XFS_FORCED_SHUTDOWN(cur->bc_mp));
+ if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
+- kmem_free((void *)cur->bc_ops);
++ kmem_free(cur->bc_ops);
+ kmem_cache_free(xfs_btree_cur_zone, cur);
+ }
+
--- /dev/null
+From foo@baz Mon Jun 6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon, 6 Jun 2022 17:32:50 +0300
+Subject: xfs: fix chown leaking delalloc quota blocks when fssetxattr fails
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org
+Message-ID: <20220606143255.685988-4-amir73il@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+commit 1aecf3734a95f3c167d1495550ca57556d33f7ec upstream.
+
+While refactoring the quota code to create a function to allocate inode
+change transactions, I noticed that xfs_qm_vop_chown_reserve does more
+than just make reservations: it also *modifies* the incore counts
+directly to handle the owner id change for the delalloc blocks.
+
+I then observed that the fssetxattr code continues validating input
+arguments after making the quota reservation but before dirtying the
+transaction. If the routine decides to error out, it fails to undo the
+accounting switch! This leads to incorrect quota reservation and
+failure down the line.
+
+We can fix this by making the reservation function do only that -- for
+the new dquot, it reserves ondisk and delalloc blocks to the
+transaction, and the old dquot hangs on to its incore reservation for
+now. Once we actually switch the dquots, we can then update the incore
+reservations because we've dirtied the transaction and it's too late to
+turn back now.
+
+No fixes tag because this has been broken since the start of git.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_qm.c | 92 +++++++++++++++++++++-----------------------------------
+ 1 file changed, 35 insertions(+), 57 deletions(-)
+
+--- a/fs/xfs/xfs_qm.c
++++ b/fs/xfs/xfs_qm.c
+@@ -1786,6 +1786,29 @@ xfs_qm_vop_chown(
+ xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
+
+ /*
++ * Back when we made quota reservations for the chown, we reserved the
++ * ondisk blocks + delalloc blocks with the new dquot. Now that we've
++ * switched the dquots, decrease the new dquot's block reservation
++ * (having already bumped up the real counter) so that we don't have
++ * any reservation to give back when we commit.
++ */
++ xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_RES_BLKS,
++ -ip->i_delayed_blks);
++
++ /*
++ * Give the incore reservation for delalloc blocks back to the old
++ * dquot. We don't normally handle delalloc quota reservations
++ * transactionally, so just lock the dquot and subtract from the
++ * reservation. Dirty the transaction because it's too late to turn
++ * back now.
++ */
++ tp->t_flags |= XFS_TRANS_DIRTY;
++ xfs_dqlock(prevdq);
++ ASSERT(prevdq->q_blk.reserved >= ip->i_delayed_blks);
++ prevdq->q_blk.reserved -= ip->i_delayed_blks;
++ xfs_dqunlock(prevdq);
++
++ /*
+ * Take an extra reference, because the inode is going to keep
+ * this dquot pointer even after the trans_commit.
+ */
+@@ -1807,84 +1830,39 @@ xfs_qm_vop_chown_reserve(
+ uint flags)
+ {
+ struct xfs_mount *mp = ip->i_mount;
+- uint64_t delblks;
+ unsigned int blkflags;
+- struct xfs_dquot *udq_unres = NULL;
+- struct xfs_dquot *gdq_unres = NULL;
+- struct xfs_dquot *pdq_unres = NULL;
+ struct xfs_dquot *udq_delblks = NULL;
+ struct xfs_dquot *gdq_delblks = NULL;
+ struct xfs_dquot *pdq_delblks = NULL;
+- int error;
+-
+
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+ ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
+- delblks = ip->i_delayed_blks;
+ blkflags = XFS_IS_REALTIME_INODE(ip) ?
+ XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
+
+ if (XFS_IS_UQUOTA_ON(mp) && udqp &&
+- i_uid_read(VFS_I(ip)) != udqp->q_id) {
++ i_uid_read(VFS_I(ip)) != udqp->q_id)
+ udq_delblks = udqp;
+- /*
+- * If there are delayed allocation blocks, then we have to
+- * unreserve those from the old dquot, and add them to the
+- * new dquot.
+- */
+- if (delblks) {
+- ASSERT(ip->i_udquot);
+- udq_unres = ip->i_udquot;
+- }
+- }
++
+ if (XFS_IS_GQUOTA_ON(ip->i_mount) && gdqp &&
+- i_gid_read(VFS_I(ip)) != gdqp->q_id) {
++ i_gid_read(VFS_I(ip)) != gdqp->q_id)
+ gdq_delblks = gdqp;
+- if (delblks) {
+- ASSERT(ip->i_gdquot);
+- gdq_unres = ip->i_gdquot;
+- }
+- }
+
+ if (XFS_IS_PQUOTA_ON(ip->i_mount) && pdqp &&
+- ip->i_d.di_projid != pdqp->q_id) {
++ ip->i_d.di_projid != pdqp->q_id)
+ pdq_delblks = pdqp;
+- if (delblks) {
+- ASSERT(ip->i_pdquot);
+- pdq_unres = ip->i_pdquot;
+- }
+- }
+-
+- error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
+- udq_delblks, gdq_delblks, pdq_delblks,
+- ip->i_d.di_nblocks, 1, flags | blkflags);
+- if (error)
+- return error;
+
+ /*
+- * Do the delayed blks reservations/unreservations now. Since, these
+- * are done without the help of a transaction, if a reservation fails
+- * its previous reservations won't be automatically undone by trans
+- * code. So, we have to do it manually here.
++ * Reserve enough quota to handle blocks on disk and reserved for a
++ * delayed allocation. We'll actually transfer the delalloc
++ * reservation between dquots at chown time, even though that part is
++ * only semi-transactional.
+ */
+- if (delblks) {
+- /*
+- * Do the reservations first. Unreservation can't fail.
+- */
+- ASSERT(udq_delblks || gdq_delblks || pdq_delblks);
+- ASSERT(udq_unres || gdq_unres || pdq_unres);
+- error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
+- udq_delblks, gdq_delblks, pdq_delblks,
+- (xfs_qcnt_t)delblks, 0, flags | blkflags);
+- if (error)
+- return error;
+- xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
+- udq_unres, gdq_unres, pdq_unres,
+- -((xfs_qcnt_t)delblks), 0, blkflags);
+- }
+-
+- return 0;
++ return xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, udq_delblks,
++ gdq_delblks, pdq_delblks,
++ ip->i_d.di_nblocks + ip->i_delayed_blks,
++ 1, blkflags | flags);
+ }
+
+ int
--- /dev/null
+From foo@baz Mon Jun 6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon, 6 Jun 2022 17:32:51 +0300
+Subject: xfs: fix incorrect root dquot corruption error when switching group/project quota types
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org, Chandan Babu R <chandanrlinux@gmail.com>
+Message-ID: <20220606143255.685988-5-amir73il@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+commit 45068063efb7dd0a8d115c106aa05d9ab0946257 upstream.
+
+While writing up a regression test for broken behavior when a chprojid
+request fails, I noticed that we were logging corruption notices about
+the root dquot of the group/project quota file at mount time when
+testing V4 filesystems.
+
+In commit afeda6000b0c, I was trying to improve ondisk dquot validation
+by making sure that when we load an ondisk dquot into memory on behalf
+of an incore dquot, the dquot id and type matches. Unfortunately, I
+forgot that V4 filesystems only have two quota files, and can switch
+that file between group and project quota types at mount time. When we
+perform that switch, we'll try to load the default quota limits from the
+root dquot prior to running quotacheck and log a corruption error when
+the types don't match.
+
+This is inconsequential because quotacheck will reset the second quota
+file as part of doing the switch, but we shouldn't leave scary messages
+in the kernel log.
+
+Fixes: afeda6000b0c ("xfs: validate ondisk/incore dquot flags")
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_dquot.c | 39 +++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 37 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/xfs_dquot.c
++++ b/fs/xfs/xfs_dquot.c
+@@ -500,6 +500,42 @@ xfs_dquot_alloc(
+ return dqp;
+ }
+
++/* Check the ondisk dquot's id and type match what the incore dquot expects. */
++static bool
++xfs_dquot_check_type(
++ struct xfs_dquot *dqp,
++ struct xfs_disk_dquot *ddqp)
++{
++ uint8_t ddqp_type;
++ uint8_t dqp_type;
++
++ ddqp_type = ddqp->d_type & XFS_DQTYPE_REC_MASK;
++ dqp_type = xfs_dquot_type(dqp);
++
++ if (be32_to_cpu(ddqp->d_id) != dqp->q_id)
++ return false;
++
++ /*
++ * V5 filesystems always expect an exact type match. V4 filesystems
++ * expect an exact match for user dquots and for non-root group and
++ * project dquots.
++ */
++ if (xfs_sb_version_hascrc(&dqp->q_mount->m_sb) ||
++ dqp_type == XFS_DQTYPE_USER || dqp->q_id != 0)
++ return ddqp_type == dqp_type;
++
++ /*
++ * V4 filesystems support either group or project quotas, but not both
++ * at the same time. The non-user quota file can be switched between
++ * group and project quota uses depending on the mount options, which
++ * means that we can encounter the other type when we try to load quota
++ * defaults. Quotacheck will soon reset the the entire quota file
++ * (including the root dquot) anyway, but don't log scary corruption
++ * reports to dmesg.
++ */
++ return ddqp_type == XFS_DQTYPE_GROUP || ddqp_type == XFS_DQTYPE_PROJ;
++}
++
+ /* Copy the in-core quota fields in from the on-disk buffer. */
+ STATIC int
+ xfs_dquot_from_disk(
+@@ -512,8 +548,7 @@ xfs_dquot_from_disk(
+ * Ensure that we got the type and ID we were looking for.
+ * Everything else was checked by the dquot buffer verifier.
+ */
+- if ((ddqp->d_type & XFS_DQTYPE_REC_MASK) != xfs_dquot_type(dqp) ||
+- be32_to_cpu(ddqp->d_id) != dqp->q_id) {
++ if (!xfs_dquot_check_type(dqp, ddqp)) {
+ xfs_alert_tag(bp->b_mount, XFS_PTAG_VERIFIER_ERROR,
+ "Metadata corruption detected at %pS, quota %u",
+ __this_address, dqp->q_id);
--- /dev/null
+From foo@baz Mon Jun 6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon, 6 Jun 2022 17:32:53 +0300
+Subject: xfs: force log and push AIL to clear pinned inodes when aborting mount
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org, Dave Chinner <dchinner@redhat.com>
+Message-ID: <20220606143255.685988-7-amir73il@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+commit d336f7ebc65007f5831e2297e6f3383ae8dbf8ed upstream.
+
+If we allocate quota inodes in the process of mounting a filesystem but
+then decide to abort the mount, it's possible that the quota inodes are
+sitting around pinned by the log. Now that inode reclaim relies on the
+AIL to flush inodes, we have to force the log and push the AIL in
+between releasing the quota inodes and kicking off reclaim to tear down
+all the incore inodes. Do this by extracting the bits we need from the
+unmount path and reusing them. As an added bonus, failed writes during
+a failed mount will not retry forever now.
+
+This was originally found during a fuzz test of metadata directories
+(xfs/1546), but the actual symptom was that reclaim hung up on the quota
+inodes.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_mount.c | 90 +++++++++++++++++++++++++----------------------------
+ 1 file changed, 44 insertions(+), 46 deletions(-)
+
+--- a/fs/xfs/xfs_mount.c
++++ b/fs/xfs/xfs_mount.c
+@@ -632,6 +632,47 @@ xfs_check_summary_counts(
+ }
+
+ /*
++ * Flush and reclaim dirty inodes in preparation for unmount. Inodes and
++ * internal inode structures can be sitting in the CIL and AIL at this point,
++ * so we need to unpin them, write them back and/or reclaim them before unmount
++ * can proceed.
++ *
++ * An inode cluster that has been freed can have its buffer still pinned in
++ * memory because the transaction is still sitting in a iclog. The stale inodes
++ * on that buffer will be pinned to the buffer until the transaction hits the
++ * disk and the callbacks run. Pushing the AIL will skip the stale inodes and
++ * may never see the pinned buffer, so nothing will push out the iclog and
++ * unpin the buffer.
++ *
++ * Hence we need to force the log to unpin everything first. However, log
++ * forces don't wait for the discards they issue to complete, so we have to
++ * explicitly wait for them to complete here as well.
++ *
++ * Then we can tell the world we are unmounting so that error handling knows
++ * that the filesystem is going away and we should error out anything that we
++ * have been retrying in the background. This will prevent never-ending
++ * retries in AIL pushing from hanging the unmount.
++ *
++ * Finally, we can push the AIL to clean all the remaining dirty objects, then
++ * reclaim the remaining inodes that are still in memory at this point in time.
++ */
++static void
++xfs_unmount_flush_inodes(
++ struct xfs_mount *mp)
++{
++ xfs_log_force(mp, XFS_LOG_SYNC);
++ xfs_extent_busy_wait_all(mp);
++ flush_workqueue(xfs_discard_wq);
++
++ mp->m_flags |= XFS_MOUNT_UNMOUNTING;
++
++ xfs_ail_push_all_sync(mp->m_ail);
++ cancel_delayed_work_sync(&mp->m_reclaim_work);
++ xfs_reclaim_inodes(mp);
++ xfs_health_unmount(mp);
++}
++
++/*
+ * This function does the following on an initial mount of a file system:
+ * - reads the superblock from disk and init the mount struct
+ * - if we're a 32-bit kernel, do a size check on the superblock
+@@ -1005,7 +1046,7 @@ xfs_mountfs(
+ /* Clean out dquots that might be in memory after quotacheck. */
+ xfs_qm_unmount(mp);
+ /*
+- * Cancel all delayed reclaim work and reclaim the inodes directly.
++ * Flush all inode reclamation work and flush the log.
+ * We have to do this /after/ rtunmount and qm_unmount because those
+ * two will have scheduled delayed reclaim for the rt/quota inodes.
+ *
+@@ -1015,11 +1056,8 @@ xfs_mountfs(
+ * qm_unmount_quotas and therefore rely on qm_unmount to release the
+ * quota inodes.
+ */
+- cancel_delayed_work_sync(&mp->m_reclaim_work);
+- xfs_reclaim_inodes(mp);
+- xfs_health_unmount(mp);
++ xfs_unmount_flush_inodes(mp);
+ out_log_dealloc:
+- mp->m_flags |= XFS_MOUNT_UNMOUNTING;
+ xfs_log_mount_cancel(mp);
+ out_fail_wait:
+ if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
+@@ -1060,47 +1098,7 @@ xfs_unmountfs(
+ xfs_rtunmount_inodes(mp);
+ xfs_irele(mp->m_rootip);
+
+- /*
+- * We can potentially deadlock here if we have an inode cluster
+- * that has been freed has its buffer still pinned in memory because
+- * the transaction is still sitting in a iclog. The stale inodes
+- * on that buffer will be pinned to the buffer until the
+- * transaction hits the disk and the callbacks run. Pushing the AIL will
+- * skip the stale inodes and may never see the pinned buffer, so
+- * nothing will push out the iclog and unpin the buffer. Hence we
+- * need to force the log here to ensure all items are flushed into the
+- * AIL before we go any further.
+- */
+- xfs_log_force(mp, XFS_LOG_SYNC);
+-
+- /*
+- * Wait for all busy extents to be freed, including completion of
+- * any discard operation.
+- */
+- xfs_extent_busy_wait_all(mp);
+- flush_workqueue(xfs_discard_wq);
+-
+- /*
+- * We now need to tell the world we are unmounting. This will allow
+- * us to detect that the filesystem is going away and we should error
+- * out anything that we have been retrying in the background. This will
+- * prevent neverending retries in AIL pushing from hanging the unmount.
+- */
+- mp->m_flags |= XFS_MOUNT_UNMOUNTING;
+-
+- /*
+- * Flush all pending changes from the AIL.
+- */
+- xfs_ail_push_all_sync(mp->m_ail);
+-
+- /*
+- * Reclaim all inodes. At this point there should be no dirty inodes and
+- * none should be pinned or locked. Stop background inode reclaim here
+- * if it is still running.
+- */
+- cancel_delayed_work_sync(&mp->m_reclaim_work);
+- xfs_reclaim_inodes(mp);
+- xfs_health_unmount(mp);
++ xfs_unmount_flush_inodes(mp);
+
+ xfs_qm_unmount(mp);
+
--- /dev/null
+From foo@baz Mon Jun 6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon, 6 Jun 2022 17:32:52 +0300
+Subject: xfs: restore shutdown check in mapped write fault path
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org, Eric Sandeen <sandeen@redhat.com>
+Message-ID: <20220606143255.685988-6-amir73il@gmail.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit e4826691cc7e5458bcb659935d0092bcf3f08c20 upstream.
+
+XFS triggers an iomap warning in the write fault path due to a
+!PageUptodate() page if a write fault happens to occur on a page
+that recently failed writeback. The iomap writeback error handling
+code can clear the Uptodate flag if no portion of the page is
+submitted for I/O. This is reproduced by fstest generic/019, which
+combines various forms of I/O with simulated disk failures that
+inevitably lead to filesystem shutdown (which then unconditionally
+fails page writeback).
+
+This is a regression introduced by commit f150b4234397 ("xfs: split
+the iomap ops for buffered vs direct writes") due to the removal of
+a shutdown check and explicit error return in the ->iomap_begin()
+path used by the write fault path. The explicit error return
+historically translated to a SIGBUS, but now carries on with iomap
+processing where it complains about the unexpected state. Restore
+the shutdown check to xfs_buffered_write_iomap_begin() to restore
+historical behavior.
+
+Fixes: f150b4234397 ("xfs: split the iomap ops for buffered vs direct writes")
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_iomap.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/xfs/xfs_iomap.c
++++ b/fs/xfs/xfs_iomap.c
+@@ -870,6 +870,9 @@ xfs_buffered_write_iomap_begin(
+ int allocfork = XFS_DATA_FORK;
+ int error = 0;
+
++ if (XFS_FORCED_SHUTDOWN(mp))
++ return -EIO;
++
+ /* we can't use delayed allocations when using extent size hints */
+ if (xfs_get_extsz_hint(ip))
+ return xfs_direct_write_iomap_begin(inode, offset, count,
--- /dev/null
+From foo@baz Mon Jun 6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon, 6 Jun 2022 17:32:48 +0300
+Subject: xfs: set inode size after creating symlink
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org, Jeffrey Mitchell <jeffrey.mitchell@starlab.io>
+Message-ID: <20220606143255.685988-2-amir73il@gmail.com>
+
+From: Jeffrey Mitchell <jeffrey.mitchell@starlab.io>
+
+commit 8aa921a95335d0a8c8e2be35a44467e7c91ec3e4 upstream.
+
+When XFS creates a new symlink, it writes its size to disk but not to the
+VFS inode. This causes i_size_read() to return 0 for that symlink until
+it is re-read from disk, for example when the system is rebooted.
+
+I found this inconsistency while protecting directories with eCryptFS.
+The command "stat path/to/symlink/in/ecryptfs" will report "Size: 0" if
+the symlink was created after the last reboot on an XFS root.
+
+Call i_size_write() in xfs_symlink()
+
+Signed-off-by: Jeffrey Mitchell <jeffrey.mitchell@starlab.io>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_symlink.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/xfs/xfs_symlink.c
++++ b/fs/xfs/xfs_symlink.c
+@@ -300,6 +300,7 @@ xfs_symlink(
+ }
+ ASSERT(pathlen == 0);
+ }
++ i_size_write(VFS_I(ip), ip->i_d.di_size);
+
+ /*
+ * Create the directory entry for the symlink.
--- /dev/null
+From foo@baz Mon Jun 6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon, 6 Jun 2022 17:32:49 +0300
+Subject: xfs: sync lazy sb accounting on quiesce of read-only mounts
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org, Gao Xiang <hsiangkao@redhat.com>, Allison Henderson <allison.henderson@oracle.com>, "Darrick J . Wong" <darrick.wong@oracle.com>, Bill O'Donnell <billodo@redhat.com>
+Message-ID: <20220606143255.685988-3-amir73il@gmail.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 50d25484bebe94320c49dd1347d3330c7063bbdb upstream.
+
+xfs_log_sbcount() syncs the superblock specifically to accumulate
+the in-core percpu superblock counters and commit them to disk. This
+is required to maintain filesystem consistency across quiesce
+(freeze, read-only mount/remount) or unmount when lazy superblock
+accounting is enabled because individual transactions do not update
+the superblock directly.
+
+This mechanism works as expected for writable mounts, but
+xfs_log_sbcount() skips the update for read-only mounts. Read-only
+mounts otherwise still allow log recovery and write out an unmount
+record during log quiesce. If a read-only mount performs log
+recovery, it can modify the in-core superblock counters and write an
+unmount record when the filesystem unmounts without ever syncing the
+in-core counters. This leaves the filesystem with a clean log but in
+an inconsistent state with regard to lazy sb counters.
+
+Update xfs_log_sbcount() to use the same logic
+xfs_log_unmount_write() uses to determine when to write an unmount
+record. This ensures that lazy accounting is always synced before
+the log is cleaned. Refactor this logic into a new helper to
+distinguish between a writable filesystem and a writable log.
+Specifically, the log is writable unless the filesystem is mounted
+with the norecovery mount option, the underlying log device is
+read-only, or the filesystem is shutdown. Drop the freeze state
+check because the update is already allowed during the freezing
+process and no context calls this function on an already frozen fs.
+Also, retain the shutdown check in xfs_log_unmount_write() to catch
+the case where the preceding log force might have triggered a
+shutdown.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Gao Xiang <hsiangkao@redhat.com>
+Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Bill O'Donnell <billodo@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log.c | 28 ++++++++++++++++++++--------
+ fs/xfs/xfs_log.h | 1 +
+ fs/xfs/xfs_mount.c | 3 +--
+ 3 files changed, 22 insertions(+), 10 deletions(-)
+
+--- a/fs/xfs/xfs_log.c
++++ b/fs/xfs/xfs_log.c
+@@ -347,6 +347,25 @@ xlog_tic_add_region(xlog_ticket_t *tic,
+ tic->t_res_num++;
+ }
+
++bool
++xfs_log_writable(
++ struct xfs_mount *mp)
++{
++ /*
++ * Never write to the log on norecovery mounts, if the block device is
++ * read-only, or if the filesystem is shutdown. Read-only mounts still
++ * allow internal writes for log recovery and unmount purposes, so don't
++ * restrict that case here.
++ */
++ if (mp->m_flags & XFS_MOUNT_NORECOVERY)
++ return false;
++ if (xfs_readonly_buftarg(mp->m_log->l_targ))
++ return false;
++ if (XFS_FORCED_SHUTDOWN(mp))
++ return false;
++ return true;
++}
++
+ /*
+ * Replenish the byte reservation required by moving the grant write head.
+ */
+@@ -886,15 +905,8 @@ xfs_log_unmount_write(
+ {
+ struct xlog *log = mp->m_log;
+
+- /*
+- * Don't write out unmount record on norecovery mounts or ro devices.
+- * Or, if we are doing a forced umount (typically because of IO errors).
+- */
+- if (mp->m_flags & XFS_MOUNT_NORECOVERY ||
+- xfs_readonly_buftarg(log->l_targ)) {
+- ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
++ if (!xfs_log_writable(mp))
+ return;
+- }
+
+ xfs_log_force(mp, XFS_LOG_SYNC);
+
+--- a/fs/xfs/xfs_log.h
++++ b/fs/xfs/xfs_log.h
+@@ -127,6 +127,7 @@ int xfs_log_reserve(struct xfs_mount *
+ int xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
+ void xfs_log_unmount(struct xfs_mount *mp);
+ int xfs_log_force_umount(struct xfs_mount *mp, int logerror);
++bool xfs_log_writable(struct xfs_mount *mp);
+
+ struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
+ void xfs_log_ticket_put(struct xlog_ticket *ticket);
+--- a/fs/xfs/xfs_mount.c
++++ b/fs/xfs/xfs_mount.c
+@@ -1176,8 +1176,7 @@ xfs_fs_writable(
+ int
+ xfs_log_sbcount(xfs_mount_t *mp)
+ {
+- /* allow this to proceed during the freeze sequence... */
+- if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
++ if (!xfs_log_writable(mp))
+ return 0;
+
+ /*