]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.18-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 15 Aug 2022 11:17:02 +0000 (13:17 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 15 Aug 2022 11:17:02 +0000 (13:17 +0200)
added patches:
btrfs-convert-count_max_extents-to-use-fs_info-max_extent_size.patch
btrfs-join-running-log-transaction-when-logging-new-name.patch

queue-5.18/btrfs-convert-count_max_extents-to-use-fs_info-max_extent_size.patch [new file with mode: 0644]
queue-5.18/btrfs-join-running-log-transaction-when-logging-new-name.patch [new file with mode: 0644]
queue-5.18/series

diff --git a/queue-5.18/btrfs-convert-count_max_extents-to-use-fs_info-max_extent_size.patch b/queue-5.18/btrfs-convert-count_max_extents-to-use-fs_info-max_extent_size.patch
new file mode 100644 (file)
index 0000000..92a9656
--- /dev/null
@@ -0,0 +1,142 @@
+From 7d7672bc5d1038c745716c397d892d21e29de71c Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Sat, 9 Jul 2022 08:18:41 +0900
+Subject: btrfs: convert count_max_extents() to use fs_info->max_extent_size
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit 7d7672bc5d1038c745716c397d892d21e29de71c upstream.
+
+If count_max_extents() uses BTRFS_MAX_EXTENT_SIZE to calculate the number
+of extents needed, btrfs release the metadata reservation too much on its
+way to write out the data.
+
+Now that BTRFS_MAX_EXTENT_SIZE is replaced with fs_info->max_extent_size,
+convert count_max_extents() to use it instead, and fix the calculation of
+the metadata reservation.
+
+CC: stable@vger.kernel.org # 5.12+
+Fixes: d8e3fb106f39 ("btrfs: zoned: use ZONE_APPEND write for zoned mode")
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ctree.h          |   21 +++++++++++++--------
+ fs/btrfs/delalloc-space.c |    6 +++---
+ fs/btrfs/inode.c          |   16 ++++++++--------
+ 3 files changed, 24 insertions(+), 19 deletions(-)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -107,14 +107,6 @@ struct btrfs_ioctl_encoded_io_args;
+ #define BTRFS_STAT_CURR               0
+ #define BTRFS_STAT_PREV               1
+-/*
+- * Count how many BTRFS_MAX_EXTENT_SIZE cover the @size
+- */
+-static inline u32 count_max_extents(u64 size)
+-{
+-      return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
+-}
+-
+ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
+ {
+       BUG_ON(num_stripes == 0);
+@@ -3945,6 +3937,19 @@ static inline bool btrfs_is_zoned(const
+       return fs_info->zoned != 0;
+ }
++/*
++ * Count how many fs_info->max_extent_size cover the @size
++ */
++static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size)
++{
++#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
++      if (!fs_info)
++              return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
++#endif
++
++      return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size);
++}
++
+ static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
+ {
+       return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID;
+--- a/fs/btrfs/delalloc-space.c
++++ b/fs/btrfs/delalloc-space.c
+@@ -273,7 +273,7 @@ static void calc_inode_reservations(stru
+                                   u64 num_bytes, u64 disk_num_bytes,
+                                   u64 *meta_reserve, u64 *qgroup_reserve)
+ {
+-      u64 nr_extents = count_max_extents(num_bytes);
++      u64 nr_extents = count_max_extents(fs_info, num_bytes);
+       u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes);
+       u64 inode_update = btrfs_calc_metadata_size(fs_info, 1);
+@@ -349,7 +349,7 @@ int btrfs_delalloc_reserve_metadata(stru
+        * needs to free the reservation we just made.
+        */
+       spin_lock(&inode->lock);
+-      nr_extents = count_max_extents(num_bytes);
++      nr_extents = count_max_extents(fs_info, num_bytes);
+       btrfs_mod_outstanding_extents(inode, nr_extents);
+       inode->csum_bytes += disk_num_bytes;
+       btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+@@ -412,7 +412,7 @@ void btrfs_delalloc_release_extents(stru
+       unsigned num_extents;
+       spin_lock(&inode->lock);
+-      num_extents = count_max_extents(num_bytes);
++      num_extents = count_max_extents(fs_info, num_bytes);
+       btrfs_mod_outstanding_extents(inode, -num_extents);
+       btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+       spin_unlock(&inode->lock);
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -2159,10 +2159,10 @@ void btrfs_split_delalloc_extent(struct
+                * applies here, just in reverse.
+                */
+               new_size = orig->end - split + 1;
+-              num_extents = count_max_extents(new_size);
++              num_extents = count_max_extents(fs_info, new_size);
+               new_size = split - orig->start;
+-              num_extents += count_max_extents(new_size);
+-              if (count_max_extents(size) >= num_extents)
++              num_extents += count_max_extents(fs_info, new_size);
++              if (count_max_extents(fs_info, size) >= num_extents)
+                       return;
+       }
+@@ -2219,10 +2219,10 @@ void btrfs_merge_delalloc_extent(struct
+        * this case.
+        */
+       old_size = other->end - other->start + 1;
+-      num_extents = count_max_extents(old_size);
++      num_extents = count_max_extents(fs_info, old_size);
+       old_size = new->end - new->start + 1;
+-      num_extents += count_max_extents(old_size);
+-      if (count_max_extents(new_size) >= num_extents)
++      num_extents += count_max_extents(fs_info, old_size);
++      if (count_max_extents(fs_info, new_size) >= num_extents)
+               return;
+       spin_lock(&BTRFS_I(inode)->lock);
+@@ -2301,7 +2301,7 @@ void btrfs_set_delalloc_extent(struct in
+       if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
+               struct btrfs_root *root = BTRFS_I(inode)->root;
+               u64 len = state->end + 1 - state->start;
+-              u32 num_extents = count_max_extents(len);
++              u32 num_extents = count_max_extents(fs_info, len);
+               bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
+               spin_lock(&BTRFS_I(inode)->lock);
+@@ -2343,7 +2343,7 @@ void btrfs_clear_delalloc_extent(struct
+       struct btrfs_inode *inode = BTRFS_I(vfs_inode);
+       struct btrfs_fs_info *fs_info = btrfs_sb(vfs_inode->i_sb);
+       u64 len = state->end + 1 - state->start;
+-      u32 num_extents = count_max_extents(len);
++      u32 num_extents = count_max_extents(fs_info, len);
+       if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) {
+               spin_lock(&inode->lock);
diff --git a/queue-5.18/btrfs-join-running-log-transaction-when-logging-new-name.patch b/queue-5.18/btrfs-join-running-log-transaction-when-logging-new-name.patch
new file mode 100644 (file)
index 0000000..97865d2
--- /dev/null
@@ -0,0 +1,84 @@
+From 723df2bcc9e166ac7fb82b3932a53e09415dfcde Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Sun, 17 Jul 2022 22:05:05 +0100
+Subject: btrfs: join running log transaction when logging new name
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 723df2bcc9e166ac7fb82b3932a53e09415dfcde upstream.
+
+When logging a new name, in case of a rename, we pin the log before
+changing it. We then either delete a directory entry from the log or
+insert a key range item to mark the old name for deletion on log replay.
+
+However when doing one of those log changes we may have another task that
+started writing out the log (at btrfs_sync_log()) and it started before
+we pinned the log root. So we may end up changing a log tree while its
+writeback is being started by another task syncing the log. This can lead
+to inconsistencies in a log tree and other unexpected results during log
+replay, because we can get some committed node pointing to a node/leaf
+that ends up not getting written to disk before the next log commit.
+
+The problem, conceptually, started to happen in commit 88d2beec7e53fc
+("btrfs: avoid logging all directory changes during renames"), because
+there we started to update the log without joining its current transaction
+first.
+
+However the problem only became visible with commit 259c4b96d78dda
+("btrfs: stop doing unnecessary log updates during a rename"), and that is
+because we used to pin the log at btrfs_rename() and then before entering
+btrfs_log_new_name(), when unlinking the old dentry, we ended up at
+btrfs_del_inode_ref_in_log() and btrfs_del_dir_entries_in_log(). Both
+of them join the current log transaction, effectively waiting for any log
+transaction writeout (due to acquiring the root's log_mutex). This made it
+safe even after leaving the current log transaction, because we remained
+with the log pinned when we called btrfs_log_new_name().
+
+Then in commit 259c4b96d78dda ("btrfs: stop doing unnecessary log updates
+during a rename"), we removed the log pinning from btrfs_rename() and
+stopped calling btrfs_del_inode_ref_in_log() and
+btrfs_del_dir_entries_in_log() during the rename, and started to do all
+the needed work at btrfs_log_new_name(), but without joining the current
+log transaction, only pinning the log, which is racy because another task
+may have started writeout of the log tree right before we pinned the log.
+
+Both commits landed in kernel 5.18, so it doesn't make any practical
+difference which should be blamed, but I'm blaming the second commit only
+because with the first one, by chance, the problem did not happen due to
+the fact we joined the log transaction after pinning the log and unpinned
+it only after calling btrfs_log_new_name().
+
+So make btrfs_log_new_name() join the current log transaction instead of
+pinning it, so that we never do log updates if it's writeout is starting.
+
+Fixes: 259c4b96d78dda ("btrfs: stop doing unnecessary log updates during a rename")
+CC: stable@vger.kernel.org # 5.18+
+Reported-by: Zygo Blaxell <ce3g8jdj@umail.furryterror.org>
+Tested-by: Zygo Blaxell <ce3g8jdj@umail.furryterror.org>
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-log.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -7030,8 +7030,15 @@ void btrfs_log_new_name(struct btrfs_tra
+                * anyone from syncing the log until we have updated both inodes
+                * in the log.
+                */
++              ret = join_running_log_trans(root);
++              /*
++               * At least one of the inodes was logged before, so this should
++               * not fail, but if it does, it's not serious, just bail out and
++               * mark the log for a full commit.
++               */
++              if (WARN_ON_ONCE(ret < 0))
++                      goto out;
+               log_pinned = true;
+-              btrfs_pin_log_trans(root);
+               path = btrfs_alloc_path();
+               if (!path) {
index 649d100a9d33a3644bc44ed8277b9055a3f4a2a8..5041779dad644ee169cb5e2566fdec81fd294b93 100644 (file)
@@ -1080,3 +1080,5 @@ xen-blkback-fix-persistent-grants-negotiation.patch
 xen-blkback-apply-feature_persistent-parameter-when-connect.patch
 xen-blkfront-apply-feature_persistent-parameter-when-connect.patch
 powerpc-fix-eh-field-when-calling-lwarx-on-ppc32.patch
+btrfs-join-running-log-transaction-when-logging-new-name.patch
+btrfs-convert-count_max_extents-to-use-fs_info-max_extent_size.patch