]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 6 Jun 2022 17:02:20 +0000 (19:02 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 6 Jun 2022 17:02:20 +0000 (19:02 +0200)
added patches:
net-ipa-fix-page-free-in-ipa_endpoint_replenish_one.patch
net-ipa-fix-page-free-in-ipa_endpoint_trans_release.patch
xfs-assert-in-xfs_btree_del_cursor-should-take-into-account-error.patch
xfs-consider-shutdown-in-bmapbt-cursor-delete-assert.patch
xfs-fix-chown-leaking-delalloc-quota-blocks-when-fssetxattr-fails.patch
xfs-fix-incorrect-root-dquot-corruption-error-when-switching-group-project-quota-types.patch
xfs-force-log-and-push-ail-to-clear-pinned-inodes-when-aborting-mount.patch
xfs-restore-shutdown-check-in-mapped-write-fault-path.patch
xfs-set-inode-size-after-creating-symlink.patch
xfs-sync-lazy-sb-accounting-on-quiesce-of-read-only-mounts.patch

queue-5.10/net-ipa-fix-page-free-in-ipa_endpoint_replenish_one.patch [new file with mode: 0644]
queue-5.10/net-ipa-fix-page-free-in-ipa_endpoint_trans_release.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/xfs-assert-in-xfs_btree_del_cursor-should-take-into-account-error.patch [new file with mode: 0644]
queue-5.10/xfs-consider-shutdown-in-bmapbt-cursor-delete-assert.patch [new file with mode: 0644]
queue-5.10/xfs-fix-chown-leaking-delalloc-quota-blocks-when-fssetxattr-fails.patch [new file with mode: 0644]
queue-5.10/xfs-fix-incorrect-root-dquot-corruption-error-when-switching-group-project-quota-types.patch [new file with mode: 0644]
queue-5.10/xfs-force-log-and-push-ail-to-clear-pinned-inodes-when-aborting-mount.patch [new file with mode: 0644]
queue-5.10/xfs-restore-shutdown-check-in-mapped-write-fault-path.patch [new file with mode: 0644]
queue-5.10/xfs-set-inode-size-after-creating-symlink.patch [new file with mode: 0644]
queue-5.10/xfs-sync-lazy-sb-accounting-on-quiesce-of-read-only-mounts.patch [new file with mode: 0644]

diff --git a/queue-5.10/net-ipa-fix-page-free-in-ipa_endpoint_replenish_one.patch b/queue-5.10/net-ipa-fix-page-free-in-ipa_endpoint_replenish_one.patch
new file mode 100644 (file)
index 0000000..f160cd7
--- /dev/null
@@ -0,0 +1,38 @@
+From 70132763d5d2e94cd185e3aa92ac6a3ba89068fa Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@linaro.org>
+Date: Thu, 26 May 2022 10:23:14 -0500
+Subject: net: ipa: fix page free in ipa_endpoint_replenish_one()
+
+From: Alex Elder <elder@linaro.org>
+
+commit 70132763d5d2e94cd185e3aa92ac6a3ba89068fa upstream.
+
+Currently the (possibly compound) pages used for receive buffers are
+freed using __free_pages().  But according to this comment above the
+definition of that function, that's wrong:
+    If you want to use the page's reference count to decide
+    when to free the allocation, you should allocate a compound
+    page, and use put_page() instead of __free_pages().
+
+Convert the call to __free_pages() in ipa_endpoint_replenish_one()
+to use put_page() instead.
+
+Fixes: 6a606b90153b8 ("net: ipa: allocate transaction in replenish loop")
+Signed-off-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ipa/ipa_endpoint.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ipa/ipa_endpoint.c
++++ b/drivers/net/ipa/ipa_endpoint.c
+@@ -884,7 +884,7 @@ static int ipa_endpoint_replenish_one(st
+ err_trans_free:
+       gsi_trans_free(trans);
+ err_free_pages:
+-      __free_pages(page, get_order(IPA_RX_BUFFER_SIZE));
++      put_page(page);
+       return -ENOMEM;
+ }
diff --git a/queue-5.10/net-ipa-fix-page-free-in-ipa_endpoint_trans_release.patch b/queue-5.10/net-ipa-fix-page-free-in-ipa_endpoint_trans_release.patch
new file mode 100644 (file)
index 0000000..adbd982
--- /dev/null
@@ -0,0 +1,38 @@
+From 155c0c90bca918de6e4327275dfc1d97fd604115 Mon Sep 17 00:00:00 2001
+From: Alex Elder <elder@linaro.org>
+Date: Thu, 26 May 2022 10:23:13 -0500
+Subject: net: ipa: fix page free in ipa_endpoint_trans_release()
+
+From: Alex Elder <elder@linaro.org>
+
+commit 155c0c90bca918de6e4327275dfc1d97fd604115 upstream.
+
+Currently the (possibly compound) page used for receive buffers are
+freed using __free_pages().  But according to this comment above the
+definition of that function, that's wrong:
+    If you want to use the page's reference count to decide when
+    to free the allocation, you should allocate a compound page,
+    and use put_page() instead of __free_pages().
+
+Convert the call to __free_pages() in ipa_endpoint_trans_release()
+to use put_page() instead.
+
+Fixes: ed23f02680caa ("net: ipa: define per-endpoint receive buffer size")
+Signed-off-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ipa/ipa_endpoint.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ipa/ipa_endpoint.c
++++ b/drivers/net/ipa/ipa_endpoint.c
+@@ -1179,7 +1179,7 @@ void ipa_endpoint_trans_release(struct i
+               struct page *page = trans->data;
+               if (page)
+-                      __free_pages(page, get_order(IPA_RX_BUFFER_SIZE));
++                      put_page(page);
+       }
+ }
index 6ca435b189798948cc61c297193a053018e013a0..9f1f357df0d9b9e02017c4e138d3b18ccd025133 100644 (file)
@@ -425,3 +425,13 @@ vdpasim-allow-to-enable-a-vq-repeatedly.patch
 blk-iolatency-fix-inflight-count-imbalances-and-io-hangs-on-offline.patch
 coresight-core-fix-coresight-device-probe-failure-issue.patch
 phy-qcom-qmp-fix-reset-controller-leak-on-probe-errors.patch
+net-ipa-fix-page-free-in-ipa_endpoint_trans_release.patch
+net-ipa-fix-page-free-in-ipa_endpoint_replenish_one.patch
+xfs-set-inode-size-after-creating-symlink.patch
+xfs-sync-lazy-sb-accounting-on-quiesce-of-read-only-mounts.patch
+xfs-fix-chown-leaking-delalloc-quota-blocks-when-fssetxattr-fails.patch
+xfs-fix-incorrect-root-dquot-corruption-error-when-switching-group-project-quota-types.patch
+xfs-restore-shutdown-check-in-mapped-write-fault-path.patch
+xfs-force-log-and-push-ail-to-clear-pinned-inodes-when-aborting-mount.patch
+xfs-consider-shutdown-in-bmapbt-cursor-delete-assert.patch
+xfs-assert-in-xfs_btree_del_cursor-should-take-into-account-error.patch
diff --git a/queue-5.10/xfs-assert-in-xfs_btree_del_cursor-should-take-into-account-error.patch b/queue-5.10/xfs-assert-in-xfs_btree_del_cursor-should-take-into-account-error.patch
new file mode 100644 (file)
index 0000000..58d90ff
--- /dev/null
@@ -0,0 +1,82 @@
+From foo@baz Mon Jun  6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon,  6 Jun 2022 17:32:55 +0300
+Subject: xfs: assert in xfs_btree_del_cursor should take into account error
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org, Dave Chinner <dchinner@redhat.com>
+Message-ID: <20220606143255.685988-9-amir73il@gmail.com>
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 56486f307100e8fc66efa2ebd8a71941fa10bf6f upstream.
+
+xfs/538 on a 1kB block filesystem failed with this assert:
+
+XFS: Assertion failed: cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 || xfs_is_shutdown(cur->bc_mp), file: fs/xfs/libxfs/xfs_btree.c, line: 448
+
+The problem was that an allocation failed unexpectedly in
+xfs_bmbt_alloc_block() after roughly 150,000 minlen allocation error
+injections, resulting in an EFSCORRUPTED error being returned to
+xfs_bmapi_write(). The error occurred on extent-to-btree format
+conversion allocating the new root block:
+
+ RIP: 0010:xfs_bmbt_alloc_block+0x177/0x210
+ Call Trace:
+  <TASK>
+  xfs_btree_new_iroot+0xdf/0x520
+  xfs_btree_make_block_unfull+0x10d/0x1c0
+  xfs_btree_insrec+0x364/0x790
+  xfs_btree_insert+0xaa/0x210
+  xfs_bmap_add_extent_hole_real+0x1fe/0x9a0
+  xfs_bmapi_allocate+0x34c/0x420
+  xfs_bmapi_write+0x53c/0x9c0
+  xfs_alloc_file_space+0xee/0x320
+  xfs_file_fallocate+0x36b/0x450
+  vfs_fallocate+0x148/0x340
+  __x64_sys_fallocate+0x3c/0x70
+  do_syscall_64+0x35/0x80
+  entry_SYSCALL_64_after_hwframe+0x44/0xa
+
+Why the allocation failed at this point is unknown, but is likely
+that we ran the transaction out of reserved space and filesystem out
+of space with bmbt blocks because of all the minlen allocations
+being done causing worst case fragmentation of a large allocation.
+
+Regardless of the cause, we've then called xfs_bmapi_finish() which
+calls xfs_btree_del_cursor(cur, error) to tear down the cursor.
+
+So we have a failed operation, error != 0, cur->bc_ino.allocated > 0
+and the filesystem is still up. The assert fails to take into
+account that allocation can fail with an error and the transaction
+teardown will shut the filesystem down if necessary. i.e. the
+assert needs to check "|| error != 0" as well, because at this point
+shutdown is pending because the current transaction is dirty....
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_btree.c |    8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/fs/xfs/libxfs/xfs_btree.c
++++ b/fs/xfs/libxfs/xfs_btree.c
+@@ -372,8 +372,14 @@ xfs_btree_del_cursor(
+                       break;
+       }
++      /*
++       * If we are doing a BMBT update, the number of unaccounted blocks
++       * allocated during this cursor life time should be zero. If it's not
++       * zero, then we should be shut down or on our way to shutdown due to
++       * cancelling a dirty transaction on error.
++       */
+       ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 ||
+-             XFS_FORCED_SHUTDOWN(cur->bc_mp));
++             XFS_FORCED_SHUTDOWN(cur->bc_mp) || error != 0);
+       if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
+               kmem_free(cur->bc_ops);
+       kmem_cache_free(xfs_btree_cur_zone, cur);
diff --git a/queue-5.10/xfs-consider-shutdown-in-bmapbt-cursor-delete-assert.patch b/queue-5.10/xfs-consider-shutdown-in-bmapbt-cursor-delete-assert.patch
new file mode 100644 (file)
index 0000000..7eafcf4
--- /dev/null
@@ -0,0 +1,88 @@
+From foo@baz Mon Jun  6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon,  6 Jun 2022 17:32:54 +0300
+Subject: xfs: consider shutdown in bmapbt cursor delete assert
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org
+Message-ID: <20220606143255.685988-8-amir73il@gmail.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 1cd738b13ae9b29e03d6149f0246c61f76e81fcf upstream.
+
+The assert in xfs_btree_del_cursor() checks that the bmapbt block
+allocation field has been handled correctly before the cursor is
+freed. This field is used for accurate calculation of indirect block
+reservation requirements (for delayed allocations), for example.
+generic/019 reproduces a scenario where this assert fails because
+the filesystem has shutdown while in the middle of a bmbt record
+insertion. This occurs after a bmbt block has been allocated via the
+cursor but before the higher level bmap function (i.e.
+xfs_bmap_add_extent_hole_real()) completes and resets the field.
+
+Update the assert to accommodate the transient state if the
+filesystem has shutdown. While here, clean up the indentation and
+comments in the function.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_btree.c |   33 ++++++++++++---------------------
+ 1 file changed, 12 insertions(+), 21 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_btree.c
++++ b/fs/xfs/libxfs/xfs_btree.c
+@@ -353,20 +353,17 @@ xfs_btree_free_block(
+  */
+ void
+ xfs_btree_del_cursor(
+-      xfs_btree_cur_t *cur,           /* btree cursor */
+-      int             error)          /* del because of error */
++      struct xfs_btree_cur    *cur,           /* btree cursor */
++      int                     error)          /* del because of error */
+ {
+-      int             i;              /* btree level */
++      int                     i;              /* btree level */
+       /*
+-       * Clear the buffer pointers, and release the buffers.
+-       * If we're doing this in the face of an error, we
+-       * need to make sure to inspect all of the entries
+-       * in the bc_bufs array for buffers to be unlocked.
+-       * This is because some of the btree code works from
+-       * level n down to 0, and if we get an error along
+-       * the way we won't have initialized all the entries
+-       * down to 0.
++       * Clear the buffer pointers and release the buffers. If we're doing
++       * this because of an error, inspect all of the entries in the bc_bufs
++       * array for buffers to be unlocked. This is because some of the btree
++       * code works from level n down to 0, and if we get an error along the
++       * way we won't have initialized all the entries down to 0.
+        */
+       for (i = 0; i < cur->bc_nlevels; i++) {
+               if (cur->bc_bufs[i])
+@@ -374,17 +371,11 @@ xfs_btree_del_cursor(
+               else if (!error)
+                       break;
+       }
+-      /*
+-       * Can't free a bmap cursor without having dealt with the
+-       * allocated indirect blocks' accounting.
+-       */
+-      ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
+-             cur->bc_ino.allocated == 0);
+-      /*
+-       * Free the cursor.
+-       */
++
++      ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 ||
++             XFS_FORCED_SHUTDOWN(cur->bc_mp));
+       if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
+-              kmem_free((void *)cur->bc_ops);
++              kmem_free(cur->bc_ops);
+       kmem_cache_free(xfs_btree_cur_zone, cur);
+ }
diff --git a/queue-5.10/xfs-fix-chown-leaking-delalloc-quota-blocks-when-fssetxattr-fails.patch b/queue-5.10/xfs-fix-chown-leaking-delalloc-quota-blocks-when-fssetxattr-fails.patch
new file mode 100644 (file)
index 0000000..1597f94
--- /dev/null
@@ -0,0 +1,170 @@
+From foo@baz Mon Jun  6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon,  6 Jun 2022 17:32:50 +0300
+Subject: xfs: fix chown leaking delalloc quota blocks when fssetxattr fails
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org
+Message-ID: <20220606143255.685988-4-amir73il@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+commit 1aecf3734a95f3c167d1495550ca57556d33f7ec upstream.
+
+While refactoring the quota code to create a function to allocate inode
+change transactions, I noticed that xfs_qm_vop_chown_reserve does more
+than just make reservations: it also *modifies* the incore counts
+directly to handle the owner id change for the delalloc blocks.
+
+I then observed that the fssetxattr code continues validating input
+arguments after making the quota reservation but before dirtying the
+transaction.  If the routine decides to error out, it fails to undo the
+accounting switch!  This leads to incorrect quota reservation and
+failure down the line.
+
+We can fix this by making the reservation function do only that -- for
+the new dquot, it reserves ondisk and delalloc blocks to the
+transaction, and the old dquot hangs on to its incore reservation for
+now.  Once we actually switch the dquots, we can then update the incore
+reservations because we've dirtied the transaction and it's too late to
+turn back now.
+
+No fixes tag because this has been broken since the start of git.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_qm.c |   92 +++++++++++++++++++++-----------------------------------
+ 1 file changed, 35 insertions(+), 57 deletions(-)
+
+--- a/fs/xfs/xfs_qm.c
++++ b/fs/xfs/xfs_qm.c
+@@ -1786,6 +1786,29 @@ xfs_qm_vop_chown(
+       xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
+       /*
++       * Back when we made quota reservations for the chown, we reserved the
++       * ondisk blocks + delalloc blocks with the new dquot.  Now that we've
++       * switched the dquots, decrease the new dquot's block reservation
++       * (having already bumped up the real counter) so that we don't have
++       * any reservation to give back when we commit.
++       */
++      xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_RES_BLKS,
++                      -ip->i_delayed_blks);
++
++      /*
++       * Give the incore reservation for delalloc blocks back to the old
++       * dquot.  We don't normally handle delalloc quota reservations
++       * transactionally, so just lock the dquot and subtract from the
++       * reservation.  Dirty the transaction because it's too late to turn
++       * back now.
++       */
++      tp->t_flags |= XFS_TRANS_DIRTY;
++      xfs_dqlock(prevdq);
++      ASSERT(prevdq->q_blk.reserved >= ip->i_delayed_blks);
++      prevdq->q_blk.reserved -= ip->i_delayed_blks;
++      xfs_dqunlock(prevdq);
++
++      /*
+        * Take an extra reference, because the inode is going to keep
+        * this dquot pointer even after the trans_commit.
+        */
+@@ -1807,84 +1830,39 @@ xfs_qm_vop_chown_reserve(
+       uint                    flags)
+ {
+       struct xfs_mount        *mp = ip->i_mount;
+-      uint64_t                delblks;
+       unsigned int            blkflags;
+-      struct xfs_dquot        *udq_unres = NULL;
+-      struct xfs_dquot        *gdq_unres = NULL;
+-      struct xfs_dquot        *pdq_unres = NULL;
+       struct xfs_dquot        *udq_delblks = NULL;
+       struct xfs_dquot        *gdq_delblks = NULL;
+       struct xfs_dquot        *pdq_delblks = NULL;
+-      int                     error;
+-
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+       ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+-      delblks = ip->i_delayed_blks;
+       blkflags = XFS_IS_REALTIME_INODE(ip) ?
+                       XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
+       if (XFS_IS_UQUOTA_ON(mp) && udqp &&
+-          i_uid_read(VFS_I(ip)) != udqp->q_id) {
++          i_uid_read(VFS_I(ip)) != udqp->q_id)
+               udq_delblks = udqp;
+-              /*
+-               * If there are delayed allocation blocks, then we have to
+-               * unreserve those from the old dquot, and add them to the
+-               * new dquot.
+-               */
+-              if (delblks) {
+-                      ASSERT(ip->i_udquot);
+-                      udq_unres = ip->i_udquot;
+-              }
+-      }
++
+       if (XFS_IS_GQUOTA_ON(ip->i_mount) && gdqp &&
+-          i_gid_read(VFS_I(ip)) != gdqp->q_id) {
++          i_gid_read(VFS_I(ip)) != gdqp->q_id)
+               gdq_delblks = gdqp;
+-              if (delblks) {
+-                      ASSERT(ip->i_gdquot);
+-                      gdq_unres = ip->i_gdquot;
+-              }
+-      }
+       if (XFS_IS_PQUOTA_ON(ip->i_mount) && pdqp &&
+-          ip->i_d.di_projid != pdqp->q_id) {
++          ip->i_d.di_projid != pdqp->q_id)
+               pdq_delblks = pdqp;
+-              if (delblks) {
+-                      ASSERT(ip->i_pdquot);
+-                      pdq_unres = ip->i_pdquot;
+-              }
+-      }
+-
+-      error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
+-                              udq_delblks, gdq_delblks, pdq_delblks,
+-                              ip->i_d.di_nblocks, 1, flags | blkflags);
+-      if (error)
+-              return error;
+       /*
+-       * Do the delayed blks reservations/unreservations now. Since, these
+-       * are done without the help of a transaction, if a reservation fails
+-       * its previous reservations won't be automatically undone by trans
+-       * code. So, we have to do it manually here.
++       * Reserve enough quota to handle blocks on disk and reserved for a
++       * delayed allocation.  We'll actually transfer the delalloc
++       * reservation between dquots at chown time, even though that part is
++       * only semi-transactional.
+        */
+-      if (delblks) {
+-              /*
+-               * Do the reservations first. Unreservation can't fail.
+-               */
+-              ASSERT(udq_delblks || gdq_delblks || pdq_delblks);
+-              ASSERT(udq_unres || gdq_unres || pdq_unres);
+-              error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
+-                          udq_delblks, gdq_delblks, pdq_delblks,
+-                          (xfs_qcnt_t)delblks, 0, flags | blkflags);
+-              if (error)
+-                      return error;
+-              xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
+-                              udq_unres, gdq_unres, pdq_unres,
+-                              -((xfs_qcnt_t)delblks), 0, blkflags);
+-      }
+-
+-      return 0;
++      return xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, udq_delblks,
++                      gdq_delblks, pdq_delblks,
++                      ip->i_d.di_nblocks + ip->i_delayed_blks,
++                      1, blkflags | flags);
+ }
+ int
diff --git a/queue-5.10/xfs-fix-incorrect-root-dquot-corruption-error-when-switching-group-project-quota-types.patch b/queue-5.10/xfs-fix-incorrect-root-dquot-corruption-error-when-switching-group-project-quota-types.patch
new file mode 100644 (file)
index 0000000..b04b4fd
--- /dev/null
@@ -0,0 +1,95 @@
+From foo@baz Mon Jun  6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon,  6 Jun 2022 17:32:51 +0300
+Subject: xfs: fix incorrect root dquot corruption error when switching group/project quota types
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org, Chandan Babu R <chandanrlinux@gmail.com>
+Message-ID: <20220606143255.685988-5-amir73il@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+commit 45068063efb7dd0a8d115c106aa05d9ab0946257 upstream.
+
+While writing up a regression test for broken behavior when a chprojid
+request fails, I noticed that we were logging corruption notices about
+the root dquot of the group/project quota file at mount time when
+testing V4 filesystems.
+
+In commit afeda6000b0c, I was trying to improve ondisk dquot validation
+by making sure that when we load an ondisk dquot into memory on behalf
+of an incore dquot, the dquot id and type matches.  Unfortunately, I
+forgot that V4 filesystems only have two quota files, and can switch
+that file between group and project quota types at mount time.  When we
+perform that switch, we'll try to load the default quota limits from the
+root dquot prior to running quotacheck and log a corruption error when
+the types don't match.
+
+This is inconsequential because quotacheck will reset the second quota
+file as part of doing the switch, but we shouldn't leave scary messages
+in the kernel log.
+
+Fixes: afeda6000b0c ("xfs: validate ondisk/incore dquot flags")
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_dquot.c |   39 +++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 37 insertions(+), 2 deletions(-)
+
+--- a/fs/xfs/xfs_dquot.c
++++ b/fs/xfs/xfs_dquot.c
+@@ -500,6 +500,42 @@ xfs_dquot_alloc(
+       return dqp;
+ }
++/* Check the ondisk dquot's id and type match what the incore dquot expects. */
++static bool
++xfs_dquot_check_type(
++      struct xfs_dquot        *dqp,
++      struct xfs_disk_dquot   *ddqp)
++{
++      uint8_t                 ddqp_type;
++      uint8_t                 dqp_type;
++
++      ddqp_type = ddqp->d_type & XFS_DQTYPE_REC_MASK;
++      dqp_type = xfs_dquot_type(dqp);
++
++      if (be32_to_cpu(ddqp->d_id) != dqp->q_id)
++              return false;
++
++      /*
++       * V5 filesystems always expect an exact type match.  V4 filesystems
++       * expect an exact match for user dquots and for non-root group and
++       * project dquots.
++       */
++      if (xfs_sb_version_hascrc(&dqp->q_mount->m_sb) ||
++          dqp_type == XFS_DQTYPE_USER || dqp->q_id != 0)
++              return ddqp_type == dqp_type;
++
++      /*
++       * V4 filesystems support either group or project quotas, but not both
++       * at the same time.  The non-user quota file can be switched between
++       * group and project quota uses depending on the mount options, which
++       * means that we can encounter the other type when we try to load quota
++       * defaults.  Quotacheck will soon reset the the entire quota file
++       * (including the root dquot) anyway, but don't log scary corruption
++       * reports to dmesg.
++       */
++      return ddqp_type == XFS_DQTYPE_GROUP || ddqp_type == XFS_DQTYPE_PROJ;
++}
++
+ /* Copy the in-core quota fields in from the on-disk buffer. */
+ STATIC int
+ xfs_dquot_from_disk(
+@@ -512,8 +548,7 @@ xfs_dquot_from_disk(
+        * Ensure that we got the type and ID we were looking for.
+        * Everything else was checked by the dquot buffer verifier.
+        */
+-      if ((ddqp->d_type & XFS_DQTYPE_REC_MASK) != xfs_dquot_type(dqp) ||
+-          be32_to_cpu(ddqp->d_id) != dqp->q_id) {
++      if (!xfs_dquot_check_type(dqp, ddqp)) {
+               xfs_alert_tag(bp->b_mount, XFS_PTAG_VERIFIER_ERROR,
+                         "Metadata corruption detected at %pS, quota %u",
+                         __this_address, dqp->q_id);
diff --git a/queue-5.10/xfs-force-log-and-push-ail-to-clear-pinned-inodes-when-aborting-mount.patch b/queue-5.10/xfs-force-log-and-push-ail-to-clear-pinned-inodes-when-aborting-mount.patch
new file mode 100644 (file)
index 0000000..13e592e
--- /dev/null
@@ -0,0 +1,155 @@
+From foo@baz Mon Jun  6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon,  6 Jun 2022 17:32:53 +0300
+Subject: xfs: force log and push AIL to clear pinned inodes when aborting mount
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org, Dave Chinner <dchinner@redhat.com>
+Message-ID: <20220606143255.685988-7-amir73il@gmail.com>
+
+From: "Darrick J. Wong" <djwong@kernel.org>
+
+commit d336f7ebc65007f5831e2297e6f3383ae8dbf8ed upstream.
+
+If we allocate quota inodes in the process of mounting a filesystem but
+then decide to abort the mount, it's possible that the quota inodes are
+sitting around pinned by the log.  Now that inode reclaim relies on the
+AIL to flush inodes, we have to force the log and push the AIL in
+between releasing the quota inodes and kicking off reclaim to tear down
+all the incore inodes.  Do this by extracting the bits we need from the
+unmount path and reusing them.  As an added bonus, failed writes during
+a failed mount will not retry forever now.
+
+This was originally found during a fuzz test of metadata directories
+(xfs/1546), but the actual symptom was that reclaim hung up on the quota
+inodes.
+
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Dave Chinner <dchinner@redhat.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_mount.c |   90 +++++++++++++++++++++++++----------------------------
+ 1 file changed, 44 insertions(+), 46 deletions(-)
+
+--- a/fs/xfs/xfs_mount.c
++++ b/fs/xfs/xfs_mount.c
+@@ -632,6 +632,47 @@ xfs_check_summary_counts(
+ }
+ /*
++ * Flush and reclaim dirty inodes in preparation for unmount. Inodes and
++ * internal inode structures can be sitting in the CIL and AIL at this point,
++ * so we need to unpin them, write them back and/or reclaim them before unmount
++ * can proceed.
++ *
++ * An inode cluster that has been freed can have its buffer still pinned in
++ * memory because the transaction is still sitting in a iclog. The stale inodes
++ * on that buffer will be pinned to the buffer until the transaction hits the
++ * disk and the callbacks run. Pushing the AIL will skip the stale inodes and
++ * may never see the pinned buffer, so nothing will push out the iclog and
++ * unpin the buffer.
++ *
++ * Hence we need to force the log to unpin everything first. However, log
++ * forces don't wait for the discards they issue to complete, so we have to
++ * explicitly wait for them to complete here as well.
++ *
++ * Then we can tell the world we are unmounting so that error handling knows
++ * that the filesystem is going away and we should error out anything that we
++ * have been retrying in the background.  This will prevent never-ending
++ * retries in AIL pushing from hanging the unmount.
++ *
++ * Finally, we can push the AIL to clean all the remaining dirty objects, then
++ * reclaim the remaining inodes that are still in memory at this point in time.
++ */
++static void
++xfs_unmount_flush_inodes(
++      struct xfs_mount        *mp)
++{
++      xfs_log_force(mp, XFS_LOG_SYNC);
++      xfs_extent_busy_wait_all(mp);
++      flush_workqueue(xfs_discard_wq);
++
++      mp->m_flags |= XFS_MOUNT_UNMOUNTING;
++
++      xfs_ail_push_all_sync(mp->m_ail);
++      cancel_delayed_work_sync(&mp->m_reclaim_work);
++      xfs_reclaim_inodes(mp);
++      xfs_health_unmount(mp);
++}
++
++/*
+  * This function does the following on an initial mount of a file system:
+  *    - reads the superblock from disk and init the mount struct
+  *    - if we're a 32-bit kernel, do a size check on the superblock
+@@ -1005,7 +1046,7 @@ xfs_mountfs(
+       /* Clean out dquots that might be in memory after quotacheck. */
+       xfs_qm_unmount(mp);
+       /*
+-       * Cancel all delayed reclaim work and reclaim the inodes directly.
++       * Flush all inode reclamation work and flush the log.
+        * We have to do this /after/ rtunmount and qm_unmount because those
+        * two will have scheduled delayed reclaim for the rt/quota inodes.
+        *
+@@ -1015,11 +1056,8 @@ xfs_mountfs(
+        * qm_unmount_quotas and therefore rely on qm_unmount to release the
+        * quota inodes.
+        */
+-      cancel_delayed_work_sync(&mp->m_reclaim_work);
+-      xfs_reclaim_inodes(mp);
+-      xfs_health_unmount(mp);
++      xfs_unmount_flush_inodes(mp);
+  out_log_dealloc:
+-      mp->m_flags |= XFS_MOUNT_UNMOUNTING;
+       xfs_log_mount_cancel(mp);
+  out_fail_wait:
+       if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
+@@ -1060,47 +1098,7 @@ xfs_unmountfs(
+       xfs_rtunmount_inodes(mp);
+       xfs_irele(mp->m_rootip);
+-      /*
+-       * We can potentially deadlock here if we have an inode cluster
+-       * that has been freed has its buffer still pinned in memory because
+-       * the transaction is still sitting in a iclog. The stale inodes
+-       * on that buffer will be pinned to the buffer until the
+-       * transaction hits the disk and the callbacks run. Pushing the AIL will
+-       * skip the stale inodes and may never see the pinned buffer, so
+-       * nothing will push out the iclog and unpin the buffer. Hence we
+-       * need to force the log here to ensure all items are flushed into the
+-       * AIL before we go any further.
+-       */
+-      xfs_log_force(mp, XFS_LOG_SYNC);
+-
+-      /*
+-       * Wait for all busy extents to be freed, including completion of
+-       * any discard operation.
+-       */
+-      xfs_extent_busy_wait_all(mp);
+-      flush_workqueue(xfs_discard_wq);
+-
+-      /*
+-       * We now need to tell the world we are unmounting. This will allow
+-       * us to detect that the filesystem is going away and we should error
+-       * out anything that we have been retrying in the background. This will
+-       * prevent neverending retries in AIL pushing from hanging the unmount.
+-       */
+-      mp->m_flags |= XFS_MOUNT_UNMOUNTING;
+-
+-      /*
+-       * Flush all pending changes from the AIL.
+-       */
+-      xfs_ail_push_all_sync(mp->m_ail);
+-
+-      /*
+-       * Reclaim all inodes. At this point there should be no dirty inodes and
+-       * none should be pinned or locked. Stop background inode reclaim here
+-       * if it is still running.
+-       */
+-      cancel_delayed_work_sync(&mp->m_reclaim_work);
+-      xfs_reclaim_inodes(mp);
+-      xfs_health_unmount(mp);
++      xfs_unmount_flush_inodes(mp);
+       xfs_qm_unmount(mp);
diff --git a/queue-5.10/xfs-restore-shutdown-check-in-mapped-write-fault-path.patch b/queue-5.10/xfs-restore-shutdown-check-in-mapped-write-fault-path.patch
new file mode 100644 (file)
index 0000000..b828c2e
--- /dev/null
@@ -0,0 +1,53 @@
+From foo@baz Mon Jun  6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon,  6 Jun 2022 17:32:52 +0300
+Subject: xfs: restore shutdown check in mapped write fault path
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org, Eric Sandeen <sandeen@redhat.com>
+Message-ID: <20220606143255.685988-6-amir73il@gmail.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit e4826691cc7e5458bcb659935d0092bcf3f08c20 upstream.
+
+XFS triggers an iomap warning in the write fault path due to a
+!PageUptodate() page if a write fault happens to occur on a page
+that recently failed writeback. The iomap writeback error handling
+code can clear the Uptodate flag if no portion of the page is
+submitted for I/O. This is reproduced by fstest generic/019, which
+combines various forms of I/O with simulated disk failures that
+inevitably lead to filesystem shutdown (which then unconditionally
+fails page writeback).
+
+This is a regression introduced by commit f150b4234397 ("xfs: split
+the iomap ops for buffered vs direct writes") due to the removal of
+a shutdown check and explicit error return in the ->iomap_begin()
+path used by the write fault path. The explicit error return
+historically translated to a SIGBUS, but now carries on with iomap
+processing where it complains about the unexpected state. Restore
+the shutdown check to xfs_buffered_write_iomap_begin() to restore
+historical behavior.
+
+Fixes: f150b4234397 ("xfs: split the iomap ops for buffered vs direct writes")
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Eric Sandeen <sandeen@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_iomap.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/xfs/xfs_iomap.c
++++ b/fs/xfs/xfs_iomap.c
+@@ -870,6 +870,9 @@ xfs_buffered_write_iomap_begin(
+       int                     allocfork = XFS_DATA_FORK;
+       int                     error = 0;
++      if (XFS_FORCED_SHUTDOWN(mp))
++              return -EIO;
++
+       /* we can't use delayed allocations when using extent size hints */
+       if (xfs_get_extsz_hint(ip))
+               return xfs_direct_write_iomap_begin(inode, offset, count,
diff --git a/queue-5.10/xfs-set-inode-size-after-creating-symlink.patch b/queue-5.10/xfs-set-inode-size-after-creating-symlink.patch
new file mode 100644 (file)
index 0000000..b59ed4b
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Mon Jun  6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon,  6 Jun 2022 17:32:48 +0300
+Subject: xfs: set inode size after creating symlink
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org, Jeffrey Mitchell <jeffrey.mitchell@starlab.io>
+Message-ID: <20220606143255.685988-2-amir73il@gmail.com>
+
+From: Jeffrey Mitchell <jeffrey.mitchell@starlab.io>
+
+commit 8aa921a95335d0a8c8e2be35a44467e7c91ec3e4 upstream.
+
+When XFS creates a new symlink, it writes its size to disk but not to the
+VFS inode. This causes i_size_read() to return 0 for that symlink until
+it is re-read from disk, for example when the system is rebooted.
+
+I found this inconsistency while protecting directories with eCryptFS.
+The command "stat path/to/symlink/in/ecryptfs" will report "Size: 0" if
+the symlink was created after the last reboot on an XFS root.
+
+Call i_size_write() in xfs_symlink()
+
+Signed-off-by: Jeffrey Mitchell <jeffrey.mitchell@starlab.io>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Brian Foster <bfoster@redhat.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_symlink.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/xfs/xfs_symlink.c
++++ b/fs/xfs/xfs_symlink.c
+@@ -300,6 +300,7 @@ xfs_symlink(
+               }
+               ASSERT(pathlen == 0);
+       }
++      i_size_write(VFS_I(ip), ip->i_d.di_size);
+       /*
+        * Create the directory entry for the symlink.
diff --git a/queue-5.10/xfs-sync-lazy-sb-accounting-on-quiesce-of-read-only-mounts.patch b/queue-5.10/xfs-sync-lazy-sb-accounting-on-quiesce-of-read-only-mounts.patch
new file mode 100644 (file)
index 0000000..5ccffd2
--- /dev/null
@@ -0,0 +1,124 @@
+From foo@baz Mon Jun  6 07:00:47 PM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Mon,  6 Jun 2022 17:32:49 +0300
+Subject: xfs: sync lazy sb accounting on quiesce of read-only mounts
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, Dave Chinner <david@fromorbit.com>, "Darrick J . Wong" <djwong@kernel.org>, Christoph Hellwig <hch@lst.de>, Brian Foster <bfoster@redhat.com>, Christian Brauner <brauner@kernel.org>, Luis Chamberlain <mcgrof@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Adam Manzanares <a.manzanares@samsung.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org, Gao Xiang <hsiangkao@redhat.com>, Allison Henderson <allison.henderson@oracle.com>, "Darrick J . Wong" <darrick.wong@oracle.com>, Bill O'Donnell <billodo@redhat.com>
+Message-ID: <20220606143255.685988-3-amir73il@gmail.com>
+
+From: Brian Foster <bfoster@redhat.com>
+
+commit 50d25484bebe94320c49dd1347d3330c7063bbdb upstream.
+
+xfs_log_sbcount() syncs the superblock specifically to accumulate
+the in-core percpu superblock counters and commit them to disk. This
+is required to maintain filesystem consistency across quiesce
+(freeze, read-only mount/remount) or unmount when lazy superblock
+accounting is enabled because individual transactions do not update
+the superblock directly.
+
+This mechanism works as expected for writable mounts, but
+xfs_log_sbcount() skips the update for read-only mounts. Read-only
+mounts otherwise still allow log recovery and write out an unmount
+record during log quiesce. If a read-only mount performs log
+recovery, it can modify the in-core superblock counters and write an
+unmount record when the filesystem unmounts without ever syncing the
+in-core counters. This leaves the filesystem with a clean log but in
+an inconsistent state with regard to lazy sb counters.
+
+Update xfs_log_sbcount() to use the same logic
+xfs_log_unmount_write() uses to determine when to write an unmount
+record. This ensures that lazy accounting is always synced before
+the log is cleaned. Refactor this logic into a new helper to
+distinguish between a writable filesystem and a writable log.
+Specifically, the log is writable unless the filesystem is mounted
+with the norecovery mount option, the underlying log device is
+read-only, or the filesystem is shutdown. Drop the freeze state
+check because the update is already allowed during the freezing
+process and no context calls this function on an already frozen fs.
+Also, retain the shutdown check in xfs_log_unmount_write() to catch
+the case where the preceding log force might have triggered a
+shutdown.
+
+Signed-off-by: Brian Foster <bfoster@redhat.com>
+Reviewed-by: Gao Xiang <hsiangkao@redhat.com>
+Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Reviewed-by: Bill O'Donnell <billodo@redhat.com>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_log.c   |   28 ++++++++++++++++++++--------
+ fs/xfs/xfs_log.h   |    1 +
+ fs/xfs/xfs_mount.c |    3 +--
+ 3 files changed, 22 insertions(+), 10 deletions(-)
+
+--- a/fs/xfs/xfs_log.c
++++ b/fs/xfs/xfs_log.c
+@@ -347,6 +347,25 @@ xlog_tic_add_region(xlog_ticket_t *tic,
+       tic->t_res_num++;
+ }
++bool
++xfs_log_writable(
++      struct xfs_mount        *mp)
++{
++      /*
++       * Never write to the log on norecovery mounts, if the block device is
++       * read-only, or if the filesystem is shutdown. Read-only mounts still
++       * allow internal writes for log recovery and unmount purposes, so don't
++       * restrict that case here.
++       */
++      if (mp->m_flags & XFS_MOUNT_NORECOVERY)
++              return false;
++      if (xfs_readonly_buftarg(mp->m_log->l_targ))
++              return false;
++      if (XFS_FORCED_SHUTDOWN(mp))
++              return false;
++      return true;
++}
++
+ /*
+  * Replenish the byte reservation required by moving the grant write head.
+  */
+@@ -886,15 +905,8 @@ xfs_log_unmount_write(
+ {
+       struct xlog             *log = mp->m_log;
+-      /*
+-       * Don't write out unmount record on norecovery mounts or ro devices.
+-       * Or, if we are doing a forced umount (typically because of IO errors).
+-       */
+-      if (mp->m_flags & XFS_MOUNT_NORECOVERY ||
+-          xfs_readonly_buftarg(log->l_targ)) {
+-              ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
++      if (!xfs_log_writable(mp))
+               return;
+-      }
+       xfs_log_force(mp, XFS_LOG_SYNC);
+--- a/fs/xfs/xfs_log.h
++++ b/fs/xfs/xfs_log.h
+@@ -127,6 +127,7 @@ int          xfs_log_reserve(struct xfs_mount *
+ int     xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
+ void      xfs_log_unmount(struct xfs_mount *mp);
+ int     xfs_log_force_umount(struct xfs_mount *mp, int logerror);
++bool  xfs_log_writable(struct xfs_mount *mp);
+ struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
+ void    xfs_log_ticket_put(struct xlog_ticket *ticket);
+--- a/fs/xfs/xfs_mount.c
++++ b/fs/xfs/xfs_mount.c
+@@ -1176,8 +1176,7 @@ xfs_fs_writable(
+ int
+ xfs_log_sbcount(xfs_mount_t *mp)
+ {
+-      /* allow this to proceed during the freeze sequence... */
+-      if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
++      if (!xfs_log_writable(mp))
+               return 0;
+       /*