From 32db5295d06796e49d362fdbcd41a98982ab4f0a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 6 Jun 2022 19:02:20 +0200
Subject: [PATCH] 5.10-stable patches

added patches:
	net-ipa-fix-page-free-in-ipa_endpoint_replenish_one.patch
	net-ipa-fix-page-free-in-ipa_endpoint_trans_release.patch
	xfs-assert-in-xfs_btree_del_cursor-should-take-into-account-error.patch
	xfs-consider-shutdown-in-bmapbt-cursor-delete-assert.patch
	xfs-fix-chown-leaking-delalloc-quota-blocks-when-fssetxattr-fails.patch
	xfs-fix-incorrect-root-dquot-corruption-error-when-switching-group-project-quota-types.patch
	xfs-force-log-and-push-ail-to-clear-pinned-inodes-when-aborting-mount.patch
	xfs-restore-shutdown-check-in-mapped-write-fault-path.patch
	xfs-set-inode-size-after-creating-symlink.patch
	xfs-sync-lazy-sb-accounting-on-quiesce-of-read-only-mounts.patch
---
 ...e-free-in-ipa_endpoint_replenish_one.patch |  38 ++++
 ...e-free-in-ipa_endpoint_trans_release.patch |  38 ++++
 queue-5.10/series                             |  10 ++
 ...ursor-should-take-into-account-error.patch |  82 +++++++++
 ...tdown-in-bmapbt-cursor-delete-assert.patch |  88 +++++++++
 ...c-quota-blocks-when-fssetxattr-fails.patch | 170 ++++++++++++++++++
 ...-switching-group-project-quota-types.patch |  95 ++++++++++
 ...ar-pinned-inodes-when-aborting-mount.patch | 155 ++++++++++++++++
 ...own-check-in-mapped-write-fault-path.patch |  53 ++++++
 ...et-inode-size-after-creating-symlink.patch |  43 +++++
 ...nting-on-quiesce-of-read-only-mounts.patch | 124 +++++++++++++
 11 files changed, 896 insertions(+)
 create mode 100644 queue-5.10/net-ipa-fix-page-free-in-ipa_endpoint_replenish_one.patch
 create mode 100644 queue-5.10/net-ipa-fix-page-free-in-ipa_endpoint_trans_release.patch
 create mode 100644 queue-5.10/xfs-assert-in-xfs_btree_del_cursor-should-take-into-account-error.patch
 create mode 100644 queue-5.10/xfs-consider-shutdown-in-bmapbt-cursor-delete-assert.patch
 create mode 100644 queue-5.10/xfs-fix-chown-leaking-delalloc-quota-blocks-when-fssetxattr-fails.patch
 create mode 100644 queue-5.10/xfs-fix-incorrect-root-dquot-corruption-error-when-switching-group-project-quota-types.patch
 create mode 100644 queue-5.10/xfs-force-log-and-push-ail-to-clear-pinned-inodes-when-aborting-mount.patch
 create mode 100644 queue-5.10/xfs-restore-shutdown-check-in-mapped-write-fault-path.patch
 create mode 100644 queue-5.10/xfs-set-inode-size-after-creating-symlink.patch
 create mode 100644 queue-5.10/xfs-sync-lazy-sb-accounting-on-quiesce-of-read-only-mounts.patch

diff --git a/queue-5.10/net-ipa-fix-page-free-in-ipa_endpoint_replenish_one.patch b/queue-5.10/net-ipa-fix-page-free-in-ipa_endpoint_replenish_one.patch
new file mode 100644
index 00000000000..f160cd7f705
--- /dev/null
+++ b/queue-5.10/net-ipa-fix-page-free-in-ipa_endpoint_replenish_one.patch
@@ -0,0 +1,38 @@
+From 70132763d5d2e94cd185e3aa92ac6a3ba89068fa Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@linaro.org>
+Date: Thu, 26 May 2022 10:23:14 -0500
+Subject: net: ipa: fix page free in ipa_endpoint_replenish_one()
+
+From: Alex Elder <elder@linaro.org>
+
+commit 70132763d5d2e94cd185e3aa92ac6a3ba89068fa upstream.
+
+Currently the (possibly compound) pages used for receive buffers are
+freed using __free_pages().  But according to this comment above the
+definition of that function, that's wrong:
+    If you want to use the page's reference count to decide
+    when to free the allocation, you should allocate a compound
+    page, and use put_page() instead of __free_pages().
+
+Convert the call to __free_pages() in ipa_endpoint_replenish_one()
+to use put_page() instead.
+
+Fixes: 6a606b90153b8 ("net: ipa: allocate transaction in replenish loop")
+Signed-off-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ipa/ipa_endpoint.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ipa/ipa_endpoint.c
++++ b/drivers/net/ipa/ipa_endpoint.c
+@@ -884,7 +884,7 @@ static int ipa_endpoint_replenish_one(st
+ err_trans_free:
+ 	gsi_trans_free(trans);
+ err_free_pages:
+-	__free_pages(page, get_order(IPA_RX_BUFFER_SIZE));
++	put_page(page);
+ 
+ 	return -ENOMEM;
+ }
diff --git a/queue-5.10/net-ipa-fix-page-free-in-ipa_endpoint_trans_release.patch b/queue-5.10/net-ipa-fix-page-free-in-ipa_endpoint_trans_release.patch
new file mode 100644
index 00000000000..adbd9828f45
--- /dev/null
+++ b/queue-5.10/net-ipa-fix-page-free-in-ipa_endpoint_trans_release.patch
@@ -0,0 +1,38 @@
+From 155c0c90bca918de6e4327275dfc1d97fd604115 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@linaro.org>
+Date: Thu, 26 May 2022 10:23:13 -0500
+Subject: net: ipa: fix page free in ipa_endpoint_trans_release()
+
+From: Alex Elder <elder@linaro.org>
+
+commit 155c0c90bca918de6e4327275dfc1d97fd604115 upstream.
+
+Currently the (possibly compound) page used for receive buffers are
+freed using __free_pages().  But according to this comment above the
+definition of that function, that's wrong:
+    If you want to use the page's reference count to decide when
+    to free the allocation, you should allocate a compound page,
+    and use put_page() instead of __free_pages().
+
+Convert the call to __free_pages() in ipa_endpoint_trans_release()
+to use put_page() instead.
+
+Fixes: ed23f02680caa ("net: ipa: define per-endpoint receive buffer size")
+Signed-off-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ipa/ipa_endpoint.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ipa/ipa_endpoint.c
++++ b/drivers/net/ipa/ipa_endpoint.c
+@@ -1179,7 +1179,7 @@ void ipa_endpoint_trans_release(struct i
+ 		struct page *page = trans->data;
+ 
+ 		if (page)
+-			__free_pages(page, get_order(IPA_RX_BUFFER_SIZE));
++			put_page(page);
+ 	}
+ }
+ 
diff --git a/queue-5.10/series b/queue-5.10/series
index 6ca435b1897..9f1f357df0d 100644
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -425,3 +425,13 @@ vdpasim-allow-to-enable-a-vq-repeatedly.patch
 blk-iolatency-fix-inflight-count-imbalances-and-io-hangs-on-offline.patch
 coresight-core-fix-coresight-device-probe-failure-issue.patch
 phy-qcom-qmp-fix-reset-controller-leak-on-probe-errors.patch
+net-ipa-fix-page-free-in-ipa_endpoint_trans_release.patch
+net-ipa-fix-page-free-in-ipa_endpoint_replenish_one.patch
+xfs-set-inode-size-after-creating-symlink.patch
+xfs-sync-lazy-sb-accounting-on-quiesce-of-read-only-mounts.patch
+xfs-fix-chown-leaking-delalloc-quota-blocks-when-fssetxattr-fails.patch
+xfs-fix-incorrect-root-dquot-corruption-error-when-switching-group-project-quota-types.patch
+xfs-restore-shutdown-check-in-mapped-write-fault-path.patch
+xfs-force-log-and-push-ail-to-clear-pinned-inodes-when-aborting-mount.patch
+xfs-consider-shutdown-in-bmapbt-cursor-delete-assert.patch
+xfs-assert-in-xfs_btree_del_cursor-should-take-into-account-error.patch
diff --git a/queue-5.10/xfs-assert-in-xfs_btree_del_cursor-should-take-into-account-error.patch b/queue-5.10/xfs-assert-in-xfs_btree_del_cursor-should-take-into-account-error.patch
new file mode 100644
index 00000000000..58d90ffef17
--- /dev/null
+++ b/queue-5.10/xfs-assert-in-xfs_btree_del_cursor-should-take-into-account-error.patch
@@ -0,0 +1,82 @@
+From foo@baz Mon Jun  6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon,  6 Jun 2022 17:32:55 +0300
+Subject: xfs: assert in xfs_btree_del_cursor should take into account error
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org, Dave Chinner <dchinner@redhat.com>
+Message-ID: <20220606143255.685988-9-amir73il@gmail.com>
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 56486f307100e8fc66efa2ebd8a71941fa10bf6f upstream.
+
+xfs/538 on a 1kB block filesystem failed with this assert:
+
+XFS: Assertion failed: cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 || xfs_is_shutdown(cur->bc_mp), file: fs/xfs/libxfs/xfs_btree.c, line: 448
+
+The problem was that an allocation failed unexpectedly in
+xfs_bmbt_alloc_block() after roughly 150,000 minlen allocation error
+injections, resulting in an EFSCORRUPTED error being returned to
+xfs_bmapi_write(). The error occurred on extent-to-btree format
+conversion allocating the new root block:
+
+ RIP: 0010:xfs_bmbt_alloc_block+0x177/0x210
+ Call Trace:
+  <TASK>
+  xfs_btree_new_iroot+0xdf/0x520
+  xfs_btree_make_block_unfull+0x10d/0x1c0
+  xfs_btree_insrec+0x364/0x790
+  xfs_btree_insert+0xaa/0x210
+  xfs_bmap_add_extent_hole_real+0x1fe/0x9a0
+  xfs_bmapi_allocate+0x34c/0x420
+  xfs_bmapi_write+0x53c/0x9c0
+  xfs_alloc_file_space+0xee/0x320
+  xfs_file_fallocate+0x36b/0x450
+  vfs_fallocate+0x148/0x340
+  __x64_sys_fallocate+0x3c/0x70
+  do_syscall_64+0x35/0x80
+  entry_SYSCALL_64_after_hwframe+0x44/0xa
+
+Why the allocation failed at this point is unknown, but is likely
+that we ran the transaction out of reserved space and filesystem out
+of space with bmbt blocks because of all the minlen allocations
+being done causing worst case fragmentation of a large allocation.
+
+Regardless of the cause, we've then called xfs_bmapi_finish() which
+calls xfs_btree_del_cursor(cur, error) to tear down the cursor.
+
+So we have a failed operation, error != 0, cur->bc_ino.allocated > 0
+and the filesystem is still up. The assert fails to take into
+account that allocation can fail with an error and the transaction
+teardown will shut the filesystem down if necessary. i.e. the
+assert needs to check "|| error != 0" as well, because at this point
+shutdown is pending because the current transaction is dirty....
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_btree.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_btree.c
++++ b/fs/xfs/libxfs/xfs_btree.c
+@@ -372,8 +372,14 @@ xfs_btree_del_cursor(
+ 			break;
+ 	}
+ 
++	/*
++	 * If we are doing a BMBT update, the number of unaccounted blocks
++	 * allocated during this cursor life time should be zero. If it's not
++	 * zero, then we should be shut down or on our way to shutdown due to
++	 * cancelling a dirty transaction on error.
++	 */
+ 	ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 ||
+-	       XFS_FORCED_SHUTDOWN(cur->bc_mp));
++	       XFS_FORCED_SHUTDOWN(cur->bc_mp) || error != 0);
+ 	if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
+ 		kmem_free(cur->bc_ops);
+ 	kmem_cache_free(xfs_btree_cur_zone, cur);
diff --git a/queue-5.10/xfs-consider-shutdown-in-bmapbt-cursor-delete-assert.patch b/queue-5.10/xfs-consider-shutdown-in-bmapbt-cursor-delete-assert.patch
new file mode 100644
index 00000000000..7eafcf4b688
--- /dev/null
+++ b/queue-5.10/xfs-consider-shutdown-in-bmapbt-cursor-delete-assert.patch
@@ -0,0 +1,88 @@
+From foo@baz Mon Jun  6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon,  6 Jun 2022 17:32:54 +0300
+Subject: xfs: consider shutdown in bmapbt cursor delete assert
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org
+Message-ID: <20220606143255.685988-8-amir73il@gmail.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 1cd738b13ae9b29e03d6149f0246c61f76e81fcf upstream.
+
+The assert in xfs_btree_del_cursor() checks that the bmapbt block
+allocation field has been handled correctly before the cursor is
+freed. This field is used for accurate calculation of indirect block
+reservation requirements (for delayed allocations), for example.
+generic/019 reproduces a scenario where this assert fails because
+the filesystem has shutdown while in the middle of a bmbt record
+insertion. This occurs after a bmbt block has been allocated via the
+cursor but before the higher level bmap function (i.e.
+xfs_bmap_add_extent_hole_real()) completes and resets the field.
+
+Update the assert to accommodate the transient state if the
+filesystem has shutdown. While here, clean up the indentation and
+comments in the function.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_btree.c |   33 ++++++++++++---------------------
+ 1 file changed, 12 insertions(+), 21 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_btree.c
++++ b/fs/xfs/libxfs/xfs_btree.c
+@@ -353,20 +353,17 @@ xfs_btree_free_block(
+  */
+ void
+ xfs_btree_del_cursor(
+-	xfs_btree_cur_t	*cur,		/* btree cursor */
+-	int		error)		/* del because of error */
++	struct xfs_btree_cur	*cur,		/* btree cursor */
++	int			error)		/* del because of error */
+ {
+-	int		i;		/* btree level */
++	int			i;		/* btree level */
+ 
+ 	/*
+-	 * Clear the buffer pointers, and release the buffers.
+-	 * If we're doing this in the face of an error, we
+-	 * need to make sure to inspect all of the entries
+-	 * in the bc_bufs array for buffers to be unlocked.
+-	 * This is because some of the btree code works from
+-	 * level n down to 0, and if we get an error along
+-	 * the way we won't have initialized all the entries
+-	 * down to 0.
++	 * Clear the buffer pointers and release the buffers. If we're doing
++	 * this because of an error, inspect all of the entries in the bc_bufs
++	 * array for buffers to be unlocked. This is because some of the btree
++	 * code works from level n down to 0, and if we get an error along the
++	 * way we won't have initialized all the entries down to 0.
+ 	 */
+ 	for (i = 0; i < cur->bc_nlevels; i++) {
+ 		if (cur->bc_bufs[i])
+@@ -374,17 +371,11 @@ xfs_btree_del_cursor(
+ 		else if (!error)
+ 			break;
+ 	}
+-	/*
+-	 * Can't free a bmap cursor without having dealt with the
+-	 * allocated indirect blocks' accounting.
+-	 */
+-	ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
+-	       cur->bc_ino.allocated == 0);
+-	/*
+-	 * Free the cursor.
+-	 */
++
++	ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 ||
++	       XFS_FORCED_SHUTDOWN(cur->bc_mp));
+ 	if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
+-		kmem_free((void *)cur->bc_ops);
++		kmem_free(cur->bc_ops);
+ 	kmem_cache_free(xfs_btree_cur_zone, cur);
+ }
+ 
diff --git a/queue-5.10/xfs-fix-chown-leaking-delalloc-quota-blocks-when-fssetxattr-fails.patch b/queue-5.10/xfs-fix-chown-leaking-delalloc-quota-blocks-when-fssetxattr-fails.patch
new file mode 100644
index 00000000000..1597f941ac4
--- /dev/null
+++ b/queue-5.10/xfs-fix-chown-leaking-delalloc-quota-blocks-when-fssetxattr-fails.patch
@@ -0,0 +1,170 @@
+From foo@baz Mon Jun  6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon,  6 Jun 2022 17:32:50 +0300
+Subject: xfs: fix chown leaking delalloc quota blocks when fssetxattr fails
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org
+Message-ID: <20220606143255.685988-4-amir73il@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+commit 1aecf3734a95f3c167d1495550ca57556d33f7ec upstream.
+
+While refactoring the quota code to create a function to allocate inode
+change transactions, I noticed that xfs_qm_vop_chown_reserve does more
+than just make reservations: it also *modifies* the incore counts
+directly to handle the owner id change for the delalloc blocks.
+
+I then observed that the fssetxattr code continues validating input
+arguments after making the quota reservation but before dirtying the
+transaction.  If the routine decides to error out, it fails to undo the
+accounting switch!  This leads to incorrect quota reservation and
+failure down the line.
+
+We can fix this by making the reservation function do only that -- for
+the new dquot, it reserves ondisk and delalloc blocks to the
+transaction, and the old dquot hangs on to its incore reservation for
+now.  Once we actually switch the dquots, we can then update the incore
+reservations because we've dirtied the transaction and it's too late to
+turn back now.
+
+No fixes tag because this has been broken since the start of git.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_qm.c |   92 +++++++++++++++++++++-----------------------------------
+ 1 file changed, 35 insertions(+), 57 deletions(-)
+
+--- a/fs/xfs/xfs_qm.c
++++ b/fs/xfs/xfs_qm.c
+@@ -1786,6 +1786,29 @@ xfs_qm_vop_chown(
+ 	xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
+ 
+ 	/*
++	 * Back when we made quota reservations for the chown, we reserved the
++	 * ondisk blocks + delalloc blocks with the new dquot.  Now that we've
++	 * switched the dquots, decrease the new dquot's block reservation
++	 * (having already bumped up the real counter) so that we don't have
++	 * any reservation to give back when we commit.
++	 */
++	xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_RES_BLKS,
++			-ip->i_delayed_blks);
++
++	/*
++	 * Give the incore reservation for delalloc blocks back to the old
++	 * dquot.  We don't normally handle delalloc quota reservations
++	 * transactionally, so just lock the dquot and subtract from the
++	 * reservation.  Dirty the transaction because it's too late to turn
++	 * back now.
++	 */
++	tp->t_flags |= XFS_TRANS_DIRTY;
++	xfs_dqlock(prevdq);
++	ASSERT(prevdq->q_blk.reserved >= ip->i_delayed_blks);
++	prevdq->q_blk.reserved -= ip->i_delayed_blks;
++	xfs_dqunlock(prevdq);
++
++	/*
+ 	 * Take an extra reference, because the inode is going to keep
+ 	 * this dquot pointer even after the trans_commit.
+ 	 */
+@@ -1807,84 +1830,39 @@ xfs_qm_vop_chown_reserve(
+ 	uint			flags)
+ {
+ 	struct xfs_mount	*mp = ip->i_mount;
+-	uint64_t		delblks;
+ 	unsigned int		blkflags;
+-	struct xfs_dquot	*udq_unres = NULL;
+-	struct xfs_dquot	*gdq_unres = NULL;
+-	struct xfs_dquot	*pdq_unres = NULL;
+ 	struct xfs_dquot	*udq_delblks = NULL;
+ 	struct xfs_dquot	*gdq_delblks = NULL;
+ 	struct xfs_dquot	*pdq_delblks = NULL;
+-	int			error;
+-
+ 
+ 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+ 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+ 
+-	delblks = ip->i_delayed_blks;
+ 	blkflags = XFS_IS_REALTIME_INODE(ip) ?
+ 			XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
+ 
+ 	if (XFS_IS_UQUOTA_ON(mp) && udqp &&
+-	    i_uid_read(VFS_I(ip)) != udqp->q_id) {
++	    i_uid_read(VFS_I(ip)) != udqp->q_id)
+ 		udq_delblks = udqp;
+-		/*
+-		 * If there are delayed allocation blocks, then we have to
+-		 * unreserve those from the old dquot, and add them to the
+-		 * new dquot.
+-		 */
+-		if (delblks) {
+-			ASSERT(ip->i_udquot);
+-			udq_unres = ip->i_udquot;
+-		}
+-	}
++
+ 	if (XFS_IS_GQUOTA_ON(ip->i_mount) && gdqp &&
+-	    i_gid_read(VFS_I(ip)) != gdqp->q_id) {
++	    i_gid_read(VFS_I(ip)) != gdqp->q_id)
+ 		gdq_delblks = gdqp;
+-		if (delblks) {
+-			ASSERT(ip->i_gdquot);
+-			gdq_unres = ip->i_gdquot;
+-		}
+-	}
+ 
+ 	if (XFS_IS_PQUOTA_ON(ip->i_mount) && pdqp &&
+-	    ip->i_d.di_projid != pdqp->q_id) {
++	    ip->i_d.di_projid != pdqp->q_id)
+ 		pdq_delblks = pdqp;
+-		if (delblks) {
+-			ASSERT(ip->i_pdquot);
+-			pdq_unres = ip->i_pdquot;
+-		}
+-	}
+-
+-	error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
+-				udq_delblks, gdq_delblks, pdq_delblks,
+-				ip->i_d.di_nblocks, 1, flags | blkflags);
+-	if (error)
+-		return error;
+ 
+ 	/*
+-	 * Do the delayed blks reservations/unreservations now. Since, these
+-	 * are done without the help of a transaction, if a reservation fails
+-	 * its previous reservations won't be automatically undone by trans
+-	 * code. So, we have to do it manually here.
++	 * Reserve enough quota to handle blocks on disk and reserved for a
++	 * delayed allocation.  We'll actually transfer the delalloc
++	 * reservation between dquots at chown time, even though that part is
++	 * only semi-transactional.
+ 	 */
+-	if (delblks) {
+-		/*
+-		 * Do the reservations first. Unreservation can't fail.
+-		 */
+-		ASSERT(udq_delblks || gdq_delblks || pdq_delblks);
+-		ASSERT(udq_unres || gdq_unres || pdq_unres);
+-		error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
+-			    udq_delblks, gdq_delblks, pdq_delblks,
+-			    (xfs_qcnt_t)delblks, 0, flags | blkflags);
+-		if (error)
+-			return error;
+-		xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
+-				udq_unres, gdq_unres, pdq_unres,
+-				-((xfs_qcnt_t)delblks), 0, blkflags);
+-	}
+-
+-	return 0;
++	return xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, udq_delblks,
++			gdq_delblks, pdq_delblks,
++			ip->i_d.di_nblocks + ip->i_delayed_blks,
++			1, blkflags | flags);
+ }
+ 
+ int
diff --git a/queue-5.10/xfs-fix-incorrect-root-dquot-corruption-error-when-switching-group-project-quota-types.patch b/queue-5.10/xfs-fix-incorrect-root-dquot-corruption-error-when-switching-group-project-quota-types.patch
new file mode 100644
index 00000000000..b04b4fdadd2
--- /dev/null
+++ b/queue-5.10/xfs-fix-incorrect-root-dquot-corruption-error-when-switching-group-project-quota-types.patch
@@ -0,0 +1,95 @@
+From foo@baz Mon Jun  6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon,  6 Jun 2022 17:32:51 +0300
+Subject: xfs: fix incorrect root dquot corruption error when switching group/project quota types
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org, Chandan Babu R <chandanrlinux@gmail.com>
+Message-ID: <20220606143255.685988-5-amir73il@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+commit 45068063efb7dd0a8d115c106aa05d9ab0946257 upstream.
+
+While writing up a regression test for broken behavior when a chprojid
+request fails, I noticed that we were logging corruption notices about
+the root dquot of the group/project quota file at mount time when
+testing V4 filesystems.
+
+In commit afeda6000b0c, I was trying to improve ondisk dquot validation
+by making sure that when we load an ondisk dquot into memory on behalf
+of an incore dquot, the dquot id and type matches.  Unfortunately, I
+forgot that V4 filesystems only have two quota files, and can switch
+that file between group and project quota types at mount time.  When we
+perform that switch, we'll try to load the default quota limits from the
+root dquot prior to running quotacheck and log a corruption error when
+the types don't match.
+
+This is inconsequential because quotacheck will reset the second quota
+file as part of doing the switch, but we shouldn't leave scary messages
+in the kernel log.
+
+Fixes: afeda6000b0c ("xfs: validate ondisk/incore dquot flags")
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_dquot.c |   39 +++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 37 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/xfs_dquot.c
++++ b/fs/xfs/xfs_dquot.c
+@@ -500,6 +500,42 @@ xfs_dquot_alloc(
+ 	return dqp;
+ }
+ 
++/* Check the ondisk dquot's id and type match what the incore dquot expects. */
++static bool
++xfs_dquot_check_type(
++	struct xfs_dquot	*dqp,
++	struct xfs_disk_dquot	*ddqp)
++{
++	uint8_t			ddqp_type;
++	uint8_t			dqp_type;
++
++	ddqp_type = ddqp->d_type & XFS_DQTYPE_REC_MASK;
++	dqp_type = xfs_dquot_type(dqp);
++
++	if (be32_to_cpu(ddqp->d_id) != dqp->q_id)
++		return false;
++
++	/*
++	 * V5 filesystems always expect an exact type match.  V4 filesystems
++	 * expect an exact match for user dquots and for non-root group and
++	 * project dquots.
++	 */
++	if (xfs_sb_version_hascrc(&dqp->q_mount->m_sb) ||
++	    dqp_type == XFS_DQTYPE_USER || dqp->q_id != 0)
++		return ddqp_type == dqp_type;
++
++	/*
++	 * V4 filesystems support either group or project quotas, but not both
++	 * at the same time.  The non-user quota file can be switched between
++	 * group and project quota uses depending on the mount options, which
++	 * means that we can encounter the other type when we try to load quota
++	 * defaults.  Quotacheck will soon reset the the entire quota file
++	 * (including the root dquot) anyway, but don't log scary corruption
++	 * reports to dmesg.
++	 */
++	return ddqp_type == XFS_DQTYPE_GROUP || ddqp_type == XFS_DQTYPE_PROJ;
++}
++
+ /* Copy the in-core quota fields in from the on-disk buffer. */
+ STATIC int
+ xfs_dquot_from_disk(
+@@ -512,8 +548,7 @@ xfs_dquot_from_disk(
+ 	 * Ensure that we got the type and ID we were looking for.
+ 	 * Everything else was checked by the dquot buffer verifier.
+ 	 */
+-	if ((ddqp->d_type & XFS_DQTYPE_REC_MASK) != xfs_dquot_type(dqp) ||
+-	    be32_to_cpu(ddqp->d_id) != dqp->q_id) {
++	if (!xfs_dquot_check_type(dqp, ddqp)) {
+ 		xfs_alert_tag(bp->b_mount, XFS_PTAG_VERIFIER_ERROR,
+ 			  "Metadata corruption detected at %pS, quota %u",
+ 			  __this_address, dqp->q_id);
diff --git a/queue-5.10/xfs-force-log-and-push-ail-to-clear-pinned-inodes-when-aborting-mount.patch b/queue-5.10/xfs-force-log-and-push-ail-to-clear-pinned-inodes-when-aborting-mount.patch
new file mode 100644
index 00000000000..13e592eb754
--- /dev/null
+++ b/queue-5.10/xfs-force-log-and-push-ail-to-clear-pinned-inodes-when-aborting-mount.patch
@@ -0,0 +1,155 @@
+From foo@baz Mon Jun  6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon,  6 Jun 2022 17:32:53 +0300
+Subject: xfs: force log and push AIL to clear pinned inodes when aborting mount
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org, Dave Chinner <dchinner@redhat.com>
+Message-ID: <20220606143255.685988-7-amir73il@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+commit d336f7ebc65007f5831e2297e6f3383ae8dbf8ed upstream.
+
+If we allocate quota inodes in the process of mounting a filesystem but
+then decide to abort the mount, it's possible that the quota inodes are
+sitting around pinned by the log.  Now that inode reclaim relies on the
+AIL to flush inodes, we have to force the log and push the AIL in
+between releasing the quota inodes and kicking off reclaim to tear down
+all the incore inodes.  Do this by extracting the bits we need from the
+unmount path and reusing them.  As an added bonus, failed writes during
+a failed mount will not retry forever now.
+
+This was originally found during a fuzz test of metadata directories
+(xfs/1546), but the actual symptom was that reclaim hung up on the quota
+inodes.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_mount.c |   90 +++++++++++++++++++++++++----------------------------
+ 1 file changed, 44 insertions(+), 46 deletions(-)
+
+--- a/fs/xfs/xfs_mount.c
++++ b/fs/xfs/xfs_mount.c
+@@ -632,6 +632,47 @@ xfs_check_summary_counts(
+ }
+ 
+ /*
++ * Flush and reclaim dirty inodes in preparation for unmount. Inodes and
++ * internal inode structures can be sitting in the CIL and AIL at this point,
++ * so we need to unpin them, write them back and/or reclaim them before unmount
++ * can proceed.
++ *
++ * An inode cluster that has been freed can have its buffer still pinned in
++ * memory because the transaction is still sitting in a iclog. The stale inodes
++ * on that buffer will be pinned to the buffer until the transaction hits the
++ * disk and the callbacks run. Pushing the AIL will skip the stale inodes and
++ * may never see the pinned buffer, so nothing will push out the iclog and
++ * unpin the buffer.
++ *
++ * Hence we need to force the log to unpin everything first. However, log
++ * forces don't wait for the discards they issue to complete, so we have to
++ * explicitly wait for them to complete here as well.
++ *
++ * Then we can tell the world we are unmounting so that error handling knows
++ * that the filesystem is going away and we should error out anything that we
++ * have been retrying in the background.  This will prevent never-ending
++ * retries in AIL pushing from hanging the unmount.
++ *
++ * Finally, we can push the AIL to clean all the remaining dirty objects, then
++ * reclaim the remaining inodes that are still in memory at this point in time.
++ */
++static void
++xfs_unmount_flush_inodes(
++	struct xfs_mount	*mp)
++{
++	xfs_log_force(mp, XFS_LOG_SYNC);
++	xfs_extent_busy_wait_all(mp);
++	flush_workqueue(xfs_discard_wq);
++
++	mp->m_flags |= XFS_MOUNT_UNMOUNTING;
++
++	xfs_ail_push_all_sync(mp->m_ail);
++	cancel_delayed_work_sync(&mp->m_reclaim_work);
++	xfs_reclaim_inodes(mp);
++	xfs_health_unmount(mp);
++}
++
++/*
+  * This function does the following on an initial mount of a file system:
+  *	- reads the superblock from disk and init the mount struct
+  *	- if we're a 32-bit kernel, do a size check on the superblock
+@@ -1005,7 +1046,7 @@ xfs_mountfs(
+ 	/* Clean out dquots that might be in memory after quotacheck. */
+ 	xfs_qm_unmount(mp);
+ 	/*
+-	 * Cancel all delayed reclaim work and reclaim the inodes directly.
++	 * Flush all inode reclamation work and flush the log.
+ 	 * We have to do this /after/ rtunmount and qm_unmount because those
+ 	 * two will have scheduled delayed reclaim for the rt/quota inodes.
+ 	 *
+@@ -1015,11 +1056,8 @@ xfs_mountfs(
+ 	 * qm_unmount_quotas and therefore rely on qm_unmount to release the
+ 	 * quota inodes.
+ 	 */
+-	cancel_delayed_work_sync(&mp->m_reclaim_work);
+-	xfs_reclaim_inodes(mp);
+-	xfs_health_unmount(mp);
++	xfs_unmount_flush_inodes(mp);
+  out_log_dealloc:
+-	mp->m_flags |= XFS_MOUNT_UNMOUNTING;
+ 	xfs_log_mount_cancel(mp);
+  out_fail_wait:
+ 	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
+@@ -1060,47 +1098,7 @@ xfs_unmountfs(
+ 	xfs_rtunmount_inodes(mp);
+ 	xfs_irele(mp->m_rootip);
+ 
+-	/*
+-	 * We can potentially deadlock here if we have an inode cluster
+-	 * that has been freed has its buffer still pinned in memory because
+-	 * the transaction is still sitting in a iclog. The stale inodes
+-	 * on that buffer will be pinned to the buffer until the
+-	 * transaction hits the disk and the callbacks run. Pushing the AIL will
+-	 * skip the stale inodes and may never see the pinned buffer, so
+-	 * nothing will push out the iclog and unpin the buffer. Hence we
+-	 * need to force the log here to ensure all items are flushed into the
+-	 * AIL before we go any further.
+-	 */
+-	xfs_log_force(mp, XFS_LOG_SYNC);
+-
+-	/*
+-	 * Wait for all busy extents to be freed, including completion of
+-	 * any discard operation.
+-	 */
+-	xfs_extent_busy_wait_all(mp);
+-	flush_workqueue(xfs_discard_wq);
+-
+-	/*
+-	 * We now need to tell the world we are unmounting. This will allow
+-	 * us to detect that the filesystem is going away and we should error
+-	 * out anything that we have been retrying in the background. This will
+-	 * prevent neverending retries in AIL pushing from hanging the unmount.
+-	 */
+-	mp->m_flags |= XFS_MOUNT_UNMOUNTING;
+-
+-	/*
+-	 * Flush all pending changes from the AIL.
+-	 */
+-	xfs_ail_push_all_sync(mp->m_ail);
+-
+-	/*
+-	 * Reclaim all inodes. At this point there should be no dirty inodes and
+-	 * none should be pinned or locked. Stop background inode reclaim here
+-	 * if it is still running.
+-	 */
+-	cancel_delayed_work_sync(&mp->m_reclaim_work);
+-	xfs_reclaim_inodes(mp);
+-	xfs_health_unmount(mp);
++	xfs_unmount_flush_inodes(mp);
+ 
+ 	xfs_qm_unmount(mp);
+ 
diff --git a/queue-5.10/xfs-restore-shutdown-check-in-mapped-write-fault-path.patch b/queue-5.10/xfs-restore-shutdown-check-in-mapped-write-fault-path.patch
new file mode 100644
index 00000000000..b828c2e1a9a
--- /dev/null
+++ b/queue-5.10/xfs-restore-shutdown-check-in-mapped-write-fault-path.patch
@@ -0,0 +1,53 @@
+From foo@baz Mon Jun  6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon,  6 Jun 2022 17:32:52 +0300
+Subject: xfs: restore shutdown check in mapped write fault path
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org, Eric Sandeen <sandeen@redhat.com>
+Message-ID: <20220606143255.685988-6-amir73il@gmail.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit e4826691cc7e5458bcb659935d0092bcf3f08c20 upstream.
+
+XFS triggers an iomap warning in the write fault path due to a
+!PageUptodate() page if a write fault happens to occur on a page
+that recently failed writeback. The iomap writeback error handling
+code can clear the Uptodate flag if no portion of the page is
+submitted for I/O. This is reproduced by fstest generic/019, which
+combines various forms of I/O with simulated disk failures that
+inevitably lead to filesystem shutdown (which then unconditionally
+fails page writeback).
+
+This is a regression introduced by commit f150b4234397 ("xfs: split
+the iomap ops for buffered vs direct writes") due to the removal of
+a shutdown check and explicit error return in the ->iomap_begin()
+path used by the write fault path. The explicit error return
+historically translated to a SIGBUS, but now carries on with iomap
+processing where it complains about the unexpected state. Restore
+the shutdown check to xfs_buffered_write_iomap_begin() to restore
+historical behavior.
+
+Fixes: f150b4234397 ("xfs: split the iomap ops for buffered vs direct writes")
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_iomap.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/xfs/xfs_iomap.c
++++ b/fs/xfs/xfs_iomap.c
+@@ -870,6 +870,9 @@ xfs_buffered_write_iomap_begin(
+ 	int			allocfork = XFS_DATA_FORK;
+ 	int			error = 0;
+ 
++	if (XFS_FORCED_SHUTDOWN(mp))
++		return -EIO;
++
+ 	/* we can't use delayed allocations when using extent size hints */
+ 	if (xfs_get_extsz_hint(ip))
+ 		return xfs_direct_write_iomap_begin(inode, offset, count,
diff --git a/queue-5.10/xfs-set-inode-size-after-creating-symlink.patch b/queue-5.10/xfs-set-inode-size-after-creating-symlink.patch
new file mode 100644
index 00000000000..b59ed4b7c7b
--- /dev/null
+++ b/queue-5.10/xfs-set-inode-size-after-creating-symlink.patch
@@ -0,0 +1,43 @@
+From foo@baz Mon Jun  6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon,  6 Jun 2022 17:32:48 +0300
+Subject: xfs: set inode size after creating symlink
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org, Jeffrey Mitchell <jeffrey.mitchell@starlab.io>
+Message-ID: <20220606143255.685988-2-amir73il@gmail.com>
+
+From: Jeffrey Mitchell <jeffrey.mitchell@starlab.io>
+
+commit 8aa921a95335d0a8c8e2be35a44467e7c91ec3e4 upstream.
+
+When XFS creates a new symlink, it writes its size to disk but not to the
+VFS inode. This causes i_size_read() to return 0 for that symlink until
+it is re-read from disk, for example when the system is rebooted.
+
+I found this inconsistency while protecting directories with eCryptFS.
+The command "stat path/to/symlink/in/ecryptfs" will report "Size: 0" if
+the symlink was created after the last reboot on an XFS root.
+
+Call i_size_write() in xfs_symlink()
+
+Signed-off-by: Jeffrey Mitchell <jeffrey.mitchell@starlab.io>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_symlink.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/xfs/xfs_symlink.c
++++ b/fs/xfs/xfs_symlink.c
+@@ -300,6 +300,7 @@ xfs_symlink(
+ 		}
+ 		ASSERT(pathlen == 0);
+ 	}
++	i_size_write(VFS_I(ip), ip->i_d.di_size);
+ 
+ 	/*
+ 	 * Create the directory entry for the symlink.
diff --git a/queue-5.10/xfs-sync-lazy-sb-accounting-on-quiesce-of-read-only-mounts.patch b/queue-5.10/xfs-sync-lazy-sb-accounting-on-quiesce-of-read-only-mounts.patch
new file mode 100644
index 00000000000..5ccffd28ae5
--- /dev/null
+++ b/queue-5.10/xfs-sync-lazy-sb-accounting-on-quiesce-of-read-only-mounts.patch
@@ -0,0 +1,124 @@
+From foo@baz Mon Jun  6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon,  6 Jun 2022 17:32:49 +0300
+Subject: xfs: sync lazy sb accounting on quiesce of read-only mounts
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org, Gao Xiang <hsiangkao@redhat.com>, Allison Henderson <allison.henderson@oracle.com>, "Darrick J . Wong" <darrick.wong@oracle.com>, Bill O'Donnell <billodo@redhat.com>
+Message-ID: <20220606143255.685988-3-amir73il@gmail.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 50d25484bebe94320c49dd1347d3330c7063bbdb upstream.
+
+xfs_log_sbcount() syncs the superblock specifically to accumulate
+the in-core percpu superblock counters and commit them to disk. This
+is required to maintain filesystem consistency across quiesce
+(freeze, read-only mount/remount) or unmount when lazy superblock
+accounting is enabled because individual transactions do not update
+the superblock directly.
+
+This mechanism works as expected for writable mounts, but
+xfs_log_sbcount() skips the update for read-only mounts. Read-only
+mounts otherwise still allow log recovery and write out an unmount
+record during log quiesce. If a read-only mount performs log
+recovery, it can modify the in-core superblock counters and write an
+unmount record when the filesystem unmounts without ever syncing the
+in-core counters. This leaves the filesystem with a clean log but in
+an inconsistent state with regard to lazy sb counters.
+
+Update xfs_log_sbcount() to use the same logic
+xfs_log_unmount_write() uses to determine when to write an unmount
+record. This ensures that lazy accounting is always synced before
+the log is cleaned. Refactor this logic into a new helper to
+distinguish between a writable filesystem and a writable log.
+Specifically, the log is writable unless the filesystem is mounted
+with the norecovery mount option, the underlying log device is
+read-only, or the filesystem is shutdown. Drop the freeze state
+check because the update is already allowed during the freezing
+process and no context calls this function on an already frozen fs.
+Also, retain the shutdown check in xfs_log_unmount_write() to catch
+the case where the preceding log force might have triggered a
+shutdown.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Gao Xiang <hsiangkao@redhat.com>
+Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Bill O'Donnell <billodo@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log.c   |   28 ++++++++++++++++++++--------
+ fs/xfs/xfs_log.h   |    1 +
+ fs/xfs/xfs_mount.c |    3 +--
+ 3 files changed, 22 insertions(+), 10 deletions(-)
+
+--- a/fs/xfs/xfs_log.c
++++ b/fs/xfs/xfs_log.c
+@@ -347,6 +347,25 @@ xlog_tic_add_region(xlog_ticket_t *tic,
+ 	tic->t_res_num++;
+ }
+ 
++bool
++xfs_log_writable(
++	struct xfs_mount	*mp)
++{
++	/*
++	 * Never write to the log on norecovery mounts, if the block device is
++	 * read-only, or if the filesystem is shutdown. Read-only mounts still
++	 * allow internal writes for log recovery and unmount purposes, so don't
++	 * restrict that case here.
++	 */
++	if (mp->m_flags & XFS_MOUNT_NORECOVERY)
++		return false;
++	if (xfs_readonly_buftarg(mp->m_log->l_targ))
++		return false;
++	if (XFS_FORCED_SHUTDOWN(mp))
++		return false;
++	return true;
++}
++
+ /*
+  * Replenish the byte reservation required by moving the grant write head.
+  */
+@@ -886,15 +905,8 @@ xfs_log_unmount_write(
+ {
+ 	struct xlog		*log = mp->m_log;
+ 
+-	/*
+-	 * Don't write out unmount record on norecovery mounts or ro devices.
+-	 * Or, if we are doing a forced umount (typically because of IO errors).
+-	 */
+-	if (mp->m_flags & XFS_MOUNT_NORECOVERY ||
+-	    xfs_readonly_buftarg(log->l_targ)) {
+-		ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
++	if (!xfs_log_writable(mp))
+ 		return;
+-	}
+ 
+ 	xfs_log_force(mp, XFS_LOG_SYNC);
+ 
+--- a/fs/xfs/xfs_log.h
++++ b/fs/xfs/xfs_log.h
+@@ -127,6 +127,7 @@ int	  xfs_log_reserve(struct xfs_mount *
+ int	  xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
+ void      xfs_log_unmount(struct xfs_mount *mp);
+ int	  xfs_log_force_umount(struct xfs_mount *mp, int logerror);
++bool	xfs_log_writable(struct xfs_mount *mp);
+ 
+ struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
+ void	  xfs_log_ticket_put(struct xlog_ticket *ticket);
+--- a/fs/xfs/xfs_mount.c
++++ b/fs/xfs/xfs_mount.c
+@@ -1176,8 +1176,7 @@ xfs_fs_writable(
+ int
+ xfs_log_sbcount(xfs_mount_t *mp)
+ {
+-	/* allow this to proceed during the freeze sequence... */
+-	if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
++	if (!xfs_log_writable(mp))
+ 		return 0;
+ 
+ 	/*
-- 
2.47.3