--- /dev/null
+From 7d7672bc5d1038c745716c397d892d21e29de71c Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Sat, 9 Jul 2022 08:18:41 +0900
+Subject: btrfs: convert count_max_extents() to use fs_info->max_extent_size
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit 7d7672bc5d1038c745716c397d892d21e29de71c upstream.
+
+If count_max_extents() uses BTRFS_MAX_EXTENT_SIZE to calculate the number
+of extents needed, btrfs release the metadata reservation too much on its
+way to write out the data.
+
+Now that BTRFS_MAX_EXTENT_SIZE is replaced with fs_info->max_extent_size,
+convert count_max_extents() to use it instead, and fix the calculation of
+the metadata reservation.
+
+CC: stable@vger.kernel.org # 5.12+
+Fixes: d8e3fb106f39 ("btrfs: zoned: use ZONE_APPEND write for zoned mode")
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ctree.h | 21 +++++++++++++--------
+ fs/btrfs/delalloc-space.c | 6 +++---
+ fs/btrfs/inode.c | 16 ++++++++--------
+ 3 files changed, 24 insertions(+), 19 deletions(-)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -107,14 +107,6 @@ struct btrfs_ioctl_encoded_io_args;
+ #define BTRFS_STAT_CURR 0
+ #define BTRFS_STAT_PREV 1
+
+-/*
+- * Count how many BTRFS_MAX_EXTENT_SIZE cover the @size
+- */
+-static inline u32 count_max_extents(u64 size)
+-{
+- return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
+-}
+-
+ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
+ {
+ BUG_ON(num_stripes == 0);
+@@ -3945,6 +3937,19 @@ static inline bool btrfs_is_zoned(const
+ return fs_info->zoned != 0;
+ }
+
++/*
++ * Count how many fs_info->max_extent_size cover the @size
++ */
++static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size)
++{
++#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
++ if (!fs_info)
++ return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
++#endif
++
++ return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size);
++}
++
+ static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
+ {
+ return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID;
+--- a/fs/btrfs/delalloc-space.c
++++ b/fs/btrfs/delalloc-space.c
+@@ -273,7 +273,7 @@ static void calc_inode_reservations(stru
+ u64 num_bytes, u64 disk_num_bytes,
+ u64 *meta_reserve, u64 *qgroup_reserve)
+ {
+- u64 nr_extents = count_max_extents(num_bytes);
++ u64 nr_extents = count_max_extents(fs_info, num_bytes);
+ u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes);
+ u64 inode_update = btrfs_calc_metadata_size(fs_info, 1);
+
+@@ -349,7 +349,7 @@ int btrfs_delalloc_reserve_metadata(stru
+ * needs to free the reservation we just made.
+ */
+ spin_lock(&inode->lock);
+- nr_extents = count_max_extents(num_bytes);
++ nr_extents = count_max_extents(fs_info, num_bytes);
+ btrfs_mod_outstanding_extents(inode, nr_extents);
+ inode->csum_bytes += disk_num_bytes;
+ btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+@@ -412,7 +412,7 @@ void btrfs_delalloc_release_extents(stru
+ unsigned num_extents;
+
+ spin_lock(&inode->lock);
+- num_extents = count_max_extents(num_bytes);
++ num_extents = count_max_extents(fs_info, num_bytes);
+ btrfs_mod_outstanding_extents(inode, -num_extents);
+ btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+ spin_unlock(&inode->lock);
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -2159,10 +2159,10 @@ void btrfs_split_delalloc_extent(struct
+ * applies here, just in reverse.
+ */
+ new_size = orig->end - split + 1;
+- num_extents = count_max_extents(new_size);
++ num_extents = count_max_extents(fs_info, new_size);
+ new_size = split - orig->start;
+- num_extents += count_max_extents(new_size);
+- if (count_max_extents(size) >= num_extents)
++ num_extents += count_max_extents(fs_info, new_size);
++ if (count_max_extents(fs_info, size) >= num_extents)
+ return;
+ }
+
+@@ -2219,10 +2219,10 @@ void btrfs_merge_delalloc_extent(struct
+ * this case.
+ */
+ old_size = other->end - other->start + 1;
+- num_extents = count_max_extents(old_size);
++ num_extents = count_max_extents(fs_info, old_size);
+ old_size = new->end - new->start + 1;
+- num_extents += count_max_extents(old_size);
+- if (count_max_extents(new_size) >= num_extents)
++ num_extents += count_max_extents(fs_info, old_size);
++ if (count_max_extents(fs_info, new_size) >= num_extents)
+ return;
+
+ spin_lock(&BTRFS_I(inode)->lock);
+@@ -2301,7 +2301,7 @@ void btrfs_set_delalloc_extent(struct in
+ if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ u64 len = state->end + 1 - state->start;
+- u32 num_extents = count_max_extents(len);
++ u32 num_extents = count_max_extents(fs_info, len);
+ bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
+
+ spin_lock(&BTRFS_I(inode)->lock);
+@@ -2343,7 +2343,7 @@ void btrfs_clear_delalloc_extent(struct
+ struct btrfs_inode *inode = BTRFS_I(vfs_inode);
+ struct btrfs_fs_info *fs_info = btrfs_sb(vfs_inode->i_sb);
+ u64 len = state->end + 1 - state->start;
+- u32 num_extents = count_max_extents(len);
++ u32 num_extents = count_max_extents(fs_info, len);
+
+ if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) {
+ spin_lock(&inode->lock);
--- /dev/null
+From 723df2bcc9e166ac7fb82b3932a53e09415dfcde Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Sun, 17 Jul 2022 22:05:05 +0100
+Subject: btrfs: join running log transaction when logging new name
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 723df2bcc9e166ac7fb82b3932a53e09415dfcde upstream.
+
+When logging a new name, in case of a rename, we pin the log before
+changing it. We then either delete a directory entry from the log or
+insert a key range item to mark the old name for deletion on log replay.
+
+However when doing one of those log changes we may have another task that
+started writing out the log (at btrfs_sync_log()) and it started before
+we pinned the log root. So we may end up changing a log tree while its
+writeback is being started by another task syncing the log. This can lead
+to inconsistencies in a log tree and other unexpected results during log
+replay, because we can get some committed node pointing to a node/leaf
+that ends up not getting written to disk before the next log commit.
+
+The problem, conceptually, started to happen in commit 88d2beec7e53fc
+("btrfs: avoid logging all directory changes during renames"), because
+there we started to update the log without joining its current transaction
+first.
+
+However the problem only became visible with commit 259c4b96d78dda
+("btrfs: stop doing unnecessary log updates during a rename"), and that is
+because we used to pin the log at btrfs_rename() and then before entering
+btrfs_log_new_name(), when unlinking the old dentry, we ended up at
+btrfs_del_inode_ref_in_log() and btrfs_del_dir_entries_in_log(). Both
+of them join the current log transaction, effectively waiting for any log
+transaction writeout (due to acquiring the root's log_mutex). This made it
+safe even after leaving the current log transaction, because we remained
+with the log pinned when we called btrfs_log_new_name().
+
+Then in commit 259c4b96d78dda ("btrfs: stop doing unnecessary log updates
+during a rename"), we removed the log pinning from btrfs_rename() and
+stopped calling btrfs_del_inode_ref_in_log() and
+btrfs_del_dir_entries_in_log() during the rename, and started to do all
+the needed work at btrfs_log_new_name(), but without joining the current
+log transaction, only pinning the log, which is racy because another task
+may have started writeout of the log tree right before we pinned the log.
+
+Both commits landed in kernel 5.18, so it doesn't make any practical
+difference which should be blamed, but I'm blaming the second commit only
+because with the first one, by chance, the problem did not happen due to
+the fact we joined the log transaction after pinning the log and unpinned
+it only after calling btrfs_log_new_name().
+
+So make btrfs_log_new_name() join the current log transaction instead of
+pinning it, so that we never do log updates if it's writeout is starting.
+
+Fixes: 259c4b96d78dda ("btrfs: stop doing unnecessary log updates during a rename")
+CC: stable@vger.kernel.org # 5.18+
+Reported-by: Zygo Blaxell <ce3g8jdj@umail.furryterror.org>
+Tested-by: Zygo Blaxell <ce3g8jdj@umail.furryterror.org>
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/tree-log.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -7030,8 +7030,15 @@ void btrfs_log_new_name(struct btrfs_tra
+ * anyone from syncing the log until we have updated both inodes
+ * in the log.
+ */
++ ret = join_running_log_trans(root);
++ /*
++ * At least one of the inodes was logged before, so this should
++ * not fail, but if it does, it's not serious, just bail out and
++ * mark the log for a full commit.
++ */
++ if (WARN_ON_ONCE(ret < 0))
++ goto out;
+ log_pinned = true;
+- btrfs_pin_log_trans(root);
+
+ path = btrfs_alloc_path();
+ if (!path) {