--- /dev/null
+From foo@baz Thu Aug 25 02:07:49 PM CEST 2022
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Mon, 22 Aug 2022 15:07:00 +0900
+Subject: block: add a bdev_max_zone_append_sectors helper
+To: linux-btrfs@vger.kernel.org, stable@vger.kernel.org
+Cc: Christoph Hellwig <hch@lst.de>, Damien Le Moal <damien.lemoal@opensource.wdc.com>, "Martin K . Petersen" <martin.petersen@oracle.com>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, Chaitanya Kulkarni <kch@nvidia.com>, Jens Axboe <axboe@kernel.dk>
+Message-ID: <20220822060704.1278361-2-naohiro.aota@wdc.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 2aba0d19f4d8c8929b4b3b94a9cfde2aa20e6ee2 upstream
+
+Add a helper to check the max supported sectors for zone append based on
+the block_device instead of having to poke into the block layer internal
+request_queue.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
+Link: https://lore.kernel.org/r/20220415045258.199825-16-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/nvme/target/zns.c | 3 +--
+ fs/zonefs/super.c | 3 +--
+ include/linux/blkdev.h | 6 ++++++
+ 3 files changed, 8 insertions(+), 4 deletions(-)
+
+--- a/drivers/nvme/target/zns.c
++++ b/drivers/nvme/target/zns.c
+@@ -34,8 +34,7 @@ static int validate_conv_zones_cb(struct
+
+ bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
+ {
+- struct request_queue *q = ns->bdev->bd_disk->queue;
+- u8 zasl = nvmet_zasl(queue_max_zone_append_sectors(q));
++ u8 zasl = nvmet_zasl(bdev_max_zone_append_sectors(ns->bdev));
+ struct gendisk *bd_disk = ns->bdev->bd_disk;
+ int ret;
+
+--- a/fs/zonefs/super.c
++++ b/fs/zonefs/super.c
+@@ -723,13 +723,12 @@ static ssize_t zonefs_file_dio_append(st
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ struct block_device *bdev = inode->i_sb->s_bdev;
+- unsigned int max;
++ unsigned int max = bdev_max_zone_append_sectors(bdev);
+ struct bio *bio;
+ ssize_t size;
+ int nr_pages;
+ ssize_t ret;
+
+- max = queue_max_zone_append_sectors(bdev_get_queue(bdev));
+ max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
+ iov_iter_truncate(from, max);
+
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -1387,6 +1387,12 @@ static inline unsigned int queue_max_zon
+ return min(l->max_zone_append_sectors, l->max_sectors);
+ }
+
++static inline unsigned int
++bdev_max_zone_append_sectors(struct block_device *bdev)
++{
++ return queue_max_zone_append_sectors(bdev_get_queue(bdev));
++}
++
+ static inline unsigned queue_logical_block_size(const struct request_queue *q)
+ {
+ int retval = 512;
--- /dev/null
+From foo@baz Thu Aug 25 02:07:49 PM CEST 2022
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Mon, 22 Aug 2022 15:07:01 +0900
+Subject: block: add bdev_max_segments() helper
+To: linux-btrfs@vger.kernel.org, stable@vger.kernel.org
+Cc: Naohiro Aota <naohiro.aota@wdc.com>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, Jens Axboe <axboe@kernel.dk>, Christoph Hellwig <hch@lst.de>, David Sterba <dsterba@suse.com>
+Message-ID: <20220822060704.1278361-3-naohiro.aota@wdc.com>
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit 65ea1b66482f415d51cd46515b02477257330339 upstream
+
+Add bdev_max_segments() like other queue parameters.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: Jens Axboe <axboe@kernel.dk>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/blkdev.h | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -1393,6 +1393,11 @@ bdev_max_zone_append_sectors(struct bloc
+ return queue_max_zone_append_sectors(bdev_get_queue(bdev));
+ }
+
++static inline unsigned int bdev_max_segments(struct block_device *bdev)
++{
++ return queue_max_segments(bdev_get_queue(bdev));
++}
++
+ static inline unsigned queue_logical_block_size(const struct request_queue *q)
+ {
+ int retval = 512;
--- /dev/null
+From foo@baz Thu Aug 25 02:07:49 PM CEST 2022
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Mon, 22 Aug 2022 15:07:04 +0900
+Subject: btrfs: convert count_max_extents() to use fs_info->max_extent_size
+To: linux-btrfs@vger.kernel.org, stable@vger.kernel.org
+Cc: Naohiro Aota <naohiro.aota@wdc.com>, David Sterba <dsterba@suse.com>
+Message-ID: <20220822060704.1278361-6-naohiro.aota@wdc.com>
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit 7d7672bc5d1038c745716c397d892d21e29de71c upstream
+
+If count_max_extents() uses BTRFS_MAX_EXTENT_SIZE to calculate the number
+of extents needed, btrfs release the metadata reservation too much on its
+way to write out the data.
+
+Now that BTRFS_MAX_EXTENT_SIZE is replaced with fs_info->max_extent_size,
+convert count_max_extents() to use it instead, and fix the calculation of
+the metadata reservation.
+
+CC: stable@vger.kernel.org # 5.12+
+Fixes: d8e3fb106f39 ("btrfs: zoned: use ZONE_APPEND write for zoned mode")
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ctree.h | 21 +++++++++++++--------
+ fs/btrfs/delalloc-space.c | 6 +++---
+ fs/btrfs/inode.c | 16 ++++++++--------
+ 3 files changed, 24 insertions(+), 19 deletions(-)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -105,14 +105,6 @@ struct btrfs_ref;
+ #define BTRFS_STAT_CURR 0
+ #define BTRFS_STAT_PREV 1
+
+-/*
+- * Count how many BTRFS_MAX_EXTENT_SIZE cover the @size
+- */
+-static inline u32 count_max_extents(u64 size)
+-{
+- return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
+-}
+-
+ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
+ {
+ BUG_ON(num_stripes == 0);
+@@ -3878,6 +3870,19 @@ static inline bool btrfs_is_zoned(const
+ return fs_info->zoned != 0;
+ }
+
++/*
++ * Count how many fs_info->max_extent_size cover the @size
++ */
++static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size)
++{
++#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
++ if (!fs_info)
++ return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
++#endif
++
++ return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size);
++}
++
+ static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
+ {
+ return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID;
+--- a/fs/btrfs/delalloc-space.c
++++ b/fs/btrfs/delalloc-space.c
+@@ -273,7 +273,7 @@ static void calc_inode_reservations(stru
+ u64 num_bytes, u64 *meta_reserve,
+ u64 *qgroup_reserve)
+ {
+- u64 nr_extents = count_max_extents(num_bytes);
++ u64 nr_extents = count_max_extents(fs_info, num_bytes);
+ u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes);
+ u64 inode_update = btrfs_calc_metadata_size(fs_info, 1);
+
+@@ -347,7 +347,7 @@ int btrfs_delalloc_reserve_metadata(stru
+ * needs to free the reservation we just made.
+ */
+ spin_lock(&inode->lock);
+- nr_extents = count_max_extents(num_bytes);
++ nr_extents = count_max_extents(fs_info, num_bytes);
+ btrfs_mod_outstanding_extents(inode, nr_extents);
+ inode->csum_bytes += num_bytes;
+ btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+@@ -410,7 +410,7 @@ void btrfs_delalloc_release_extents(stru
+ unsigned num_extents;
+
+ spin_lock(&inode->lock);
+- num_extents = count_max_extents(num_bytes);
++ num_extents = count_max_extents(fs_info, num_bytes);
+ btrfs_mod_outstanding_extents(inode, -num_extents);
+ btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+ spin_unlock(&inode->lock);
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -2049,10 +2049,10 @@ void btrfs_split_delalloc_extent(struct
+ * applies here, just in reverse.
+ */
+ new_size = orig->end - split + 1;
+- num_extents = count_max_extents(new_size);
++ num_extents = count_max_extents(fs_info, new_size);
+ new_size = split - orig->start;
+- num_extents += count_max_extents(new_size);
+- if (count_max_extents(size) >= num_extents)
++ num_extents += count_max_extents(fs_info, new_size);
++ if (count_max_extents(fs_info, size) >= num_extents)
+ return;
+ }
+
+@@ -2109,10 +2109,10 @@ void btrfs_merge_delalloc_extent(struct
+ * this case.
+ */
+ old_size = other->end - other->start + 1;
+- num_extents = count_max_extents(old_size);
++ num_extents = count_max_extents(fs_info, old_size);
+ old_size = new->end - new->start + 1;
+- num_extents += count_max_extents(old_size);
+- if (count_max_extents(new_size) >= num_extents)
++ num_extents += count_max_extents(fs_info, old_size);
++ if (count_max_extents(fs_info, new_size) >= num_extents)
+ return;
+
+ spin_lock(&BTRFS_I(inode)->lock);
+@@ -2191,7 +2191,7 @@ void btrfs_set_delalloc_extent(struct in
+ if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ u64 len = state->end + 1 - state->start;
+- u32 num_extents = count_max_extents(len);
++ u32 num_extents = count_max_extents(fs_info, len);
+ bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
+
+ spin_lock(&BTRFS_I(inode)->lock);
+@@ -2233,7 +2233,7 @@ void btrfs_clear_delalloc_extent(struct
+ struct btrfs_inode *inode = BTRFS_I(vfs_inode);
+ struct btrfs_fs_info *fs_info = btrfs_sb(vfs_inode->i_sb);
+ u64 len = state->end + 1 - state->start;
+- u32 num_extents = count_max_extents(len);
++ u32 num_extents = count_max_extents(fs_info, len);
+
+ if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) {
+ spin_lock(&inode->lock);
--- /dev/null
+From foo@baz Thu Aug 25 02:07:49 PM CEST 2022
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Mon, 22 Aug 2022 15:07:03 +0900
+Subject: btrfs: replace BTRFS_MAX_EXTENT_SIZE with fs_info->max_extent_size
+To: linux-btrfs@vger.kernel.org, stable@vger.kernel.org
+Cc: Naohiro Aota <naohiro.aota@wdc.com>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, David Sterba <dsterba@suse.com>
+Message-ID: <20220822060704.1278361-5-naohiro.aota@wdc.com>
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit f7b12a62f008a3041f42f2426983e59a6a0a3c59 upstream
+
+On zoned filesystem, data write out is limited by max_zone_append_size,
+and a large ordered extent is split according the size of a bio. OTOH,
+the number of extents to be written is calculated using
+BTRFS_MAX_EXTENT_SIZE, and that estimated number is used to reserve the
+metadata bytes to update and/or create the metadata items.
+
+The metadata reservation is done at e.g, btrfs_buffered_write() and then
+released according to the estimation changes. Thus, if the number of extent
+increases massively, the reserved metadata can run out.
+
+The increase of the number of extents easily occurs on zoned filesystem
+if BTRFS_MAX_EXTENT_SIZE > max_zone_append_size. And, it causes the
+following warning on a small RAM environment with disabling metadata
+over-commit (in the following patch).
+
+[75721.498492] ------------[ cut here ]------------
+[75721.505624] BTRFS: block rsv 1 returned -28
+[75721.512230] WARNING: CPU: 24 PID: 2327559 at fs/btrfs/block-rsv.c:537 btrfs_use_block_rsv+0x560/0x760 [btrfs]
+[75721.581854] CPU: 24 PID: 2327559 Comm: kworker/u64:10 Kdump: loaded Tainted: G W 5.18.0-rc2-BTRFS-ZNS+ #109
+[75721.597200] Hardware name: Supermicro Super Server/H12SSL-NT, BIOS 2.0 02/22/2021
+[75721.607310] Workqueue: btrfs-endio-write btrfs_work_helper [btrfs]
+[75721.616209] RIP: 0010:btrfs_use_block_rsv+0x560/0x760 [btrfs]
+[75721.646649] RSP: 0018:ffffc9000fbdf3e0 EFLAGS: 00010286
+[75721.654126] RAX: 0000000000000000 RBX: 0000000000004000 RCX: 0000000000000000
+[75721.663524] RDX: 0000000000000004 RSI: 0000000000000008 RDI: fffff52001f7be6e
+[75721.672921] RBP: ffffc9000fbdf420 R08: 0000000000000001 R09: ffff889f8d1fc6c7
+[75721.682493] R10: ffffed13f1a3f8d8 R11: 0000000000000001 R12: ffff88980a3c0e28
+[75721.692284] R13: ffff889b66590000 R14: ffff88980a3c0e40 R15: ffff88980a3c0e8a
+[75721.701878] FS: 0000000000000000(0000) GS:ffff889f8d000000(0000) knlGS:0000000000000000
+[75721.712601] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[75721.720726] CR2: 000055d12e05c018 CR3: 0000800193594000 CR4: 0000000000350ee0
+[75721.730499] Call Trace:
+[75721.735166] <TASK>
+[75721.739886] btrfs_alloc_tree_block+0x1e1/0x1100 [btrfs]
+[75721.747545] ? btrfs_alloc_logged_file_extent+0x550/0x550 [btrfs]
+[75721.756145] ? btrfs_get_32+0xea/0x2d0 [btrfs]
+[75721.762852] ? btrfs_get_32+0xea/0x2d0 [btrfs]
+[75721.769520] ? push_leaf_left+0x420/0x620 [btrfs]
+[75721.776431] ? memcpy+0x4e/0x60
+[75721.781931] split_leaf+0x433/0x12d0 [btrfs]
+[75721.788392] ? btrfs_get_token_32+0x580/0x580 [btrfs]
+[75721.795636] ? push_for_double_split.isra.0+0x420/0x420 [btrfs]
+[75721.803759] ? leaf_space_used+0x15d/0x1a0 [btrfs]
+[75721.811156] btrfs_search_slot+0x1bc3/0x2790 [btrfs]
+[75721.818300] ? lock_downgrade+0x7c0/0x7c0
+[75721.824411] ? free_extent_buffer.part.0+0x107/0x200 [btrfs]
+[75721.832456] ? split_leaf+0x12d0/0x12d0 [btrfs]
+[75721.839149] ? free_extent_buffer.part.0+0x14f/0x200 [btrfs]
+[75721.846945] ? free_extent_buffer+0x13/0x20 [btrfs]
+[75721.853960] ? btrfs_release_path+0x4b/0x190 [btrfs]
+[75721.861429] btrfs_csum_file_blocks+0x85c/0x1500 [btrfs]
+[75721.869313] ? rcu_read_lock_sched_held+0x16/0x80
+[75721.876085] ? lock_release+0x552/0xf80
+[75721.881957] ? btrfs_del_csums+0x8c0/0x8c0 [btrfs]
+[75721.888886] ? __kasan_check_write+0x14/0x20
+[75721.895152] ? do_raw_read_unlock+0x44/0x80
+[75721.901323] ? _raw_write_lock_irq+0x60/0x80
+[75721.907983] ? btrfs_global_root+0xb9/0xe0 [btrfs]
+[75721.915166] ? btrfs_csum_root+0x12b/0x180 [btrfs]
+[75721.921918] ? btrfs_get_global_root+0x820/0x820 [btrfs]
+[75721.929166] ? _raw_write_unlock+0x23/0x40
+[75721.935116] ? unpin_extent_cache+0x1e3/0x390 [btrfs]
+[75721.942041] btrfs_finish_ordered_io.isra.0+0xa0c/0x1dc0 [btrfs]
+[75721.949906] ? try_to_wake_up+0x30/0x14a0
+[75721.955700] ? btrfs_unlink_subvol+0xda0/0xda0 [btrfs]
+[75721.962661] ? rcu_read_lock_sched_held+0x16/0x80
+[75721.969111] ? lock_acquire+0x41b/0x4c0
+[75721.974982] finish_ordered_fn+0x15/0x20 [btrfs]
+[75721.981639] btrfs_work_helper+0x1af/0xa80 [btrfs]
+[75721.988184] ? _raw_spin_unlock_irq+0x28/0x50
+[75721.994643] process_one_work+0x815/0x1460
+[75722.000444] ? pwq_dec_nr_in_flight+0x250/0x250
+[75722.006643] ? do_raw_spin_trylock+0xbb/0x190
+[75722.013086] worker_thread+0x59a/0xeb0
+[75722.018511] kthread+0x2ac/0x360
+[75722.023428] ? process_one_work+0x1460/0x1460
+[75722.029431] ? kthread_complete_and_exit+0x30/0x30
+[75722.036044] ret_from_fork+0x22/0x30
+[75722.041255] </TASK>
+[75722.045047] irq event stamp: 0
+[75722.049703] hardirqs last enabled at (0): [<0000000000000000>] 0x0
+[75722.057610] hardirqs last disabled at (0): [<ffffffff8118a94a>] copy_process+0x1c1a/0x66b0
+[75722.067533] softirqs last enabled at (0): [<ffffffff8118a989>] copy_process+0x1c59/0x66b0
+[75722.077423] softirqs last disabled at (0): [<0000000000000000>] 0x0
+[75722.085335] ---[ end trace 0000000000000000 ]---
+
+To fix the estimation, we need to introduce fs_info->max_extent_size to
+replace BTRFS_MAX_EXTENT_SIZE, which allow setting the different size for
+regular vs zoned filesystem.
+
+Set fs_info->max_extent_size to BTRFS_MAX_EXTENT_SIZE by default. On zoned
+filesystem, it is set to fs_info->max_zone_append_size.
+
+CC: stable@vger.kernel.org # 5.12+
+Fixes: d8e3fb106f39 ("btrfs: zoned: use ZONE_APPEND write for zoned mode")
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ctree.h | 6 ++++++
+ fs/btrfs/disk-io.c | 2 ++
+ fs/btrfs/extent_io.c | 4 +++-
+ fs/btrfs/inode.c | 6 ++++--
+ fs/btrfs/zoned.c | 5 ++++-
+ 5 files changed, 19 insertions(+), 4 deletions(-)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -999,6 +999,12 @@ struct btrfs_fs_info {
+ u32 csums_per_leaf;
+ u32 stripesize;
+
++ /*
++ * Maximum size of an extent. BTRFS_MAX_EXTENT_SIZE on regular
++ * filesystem, on zoned it depends on the device constraints.
++ */
++ u64 max_extent_size;
++
+ /* Block groups and devices containing active swapfiles. */
+ spinlock_t swapfile_pins_lock;
+ struct rb_root swapfile_pins;
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -3006,6 +3006,8 @@ void btrfs_init_fs_info(struct btrfs_fs_
+ fs_info->sectorsize_bits = ilog2(4096);
+ fs_info->stripesize = 4096;
+
++ fs_info->max_extent_size = BTRFS_MAX_EXTENT_SIZE;
++
+ spin_lock_init(&fs_info->swapfile_pins_lock);
+ fs_info->swapfile_pins = RB_ROOT;
+
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -1985,8 +1985,10 @@ noinline_for_stack bool find_lock_delall
+ struct page *locked_page, u64 *start,
+ u64 *end)
+ {
++ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
+- u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
++ /* The sanity tests may not set a valid fs_info. */
++ u64 max_bytes = fs_info ? fs_info->max_extent_size : BTRFS_MAX_EXTENT_SIZE;
+ u64 delalloc_start;
+ u64 delalloc_end;
+ bool found;
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -2032,6 +2032,7 @@ int btrfs_run_delalloc_range(struct btrf
+ void btrfs_split_delalloc_extent(struct inode *inode,
+ struct extent_state *orig, u64 split)
+ {
++ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ u64 size;
+
+ /* not delalloc, ignore it */
+@@ -2039,7 +2040,7 @@ void btrfs_split_delalloc_extent(struct
+ return;
+
+ size = orig->end - orig->start + 1;
+- if (size > BTRFS_MAX_EXTENT_SIZE) {
++ if (size > fs_info->max_extent_size) {
+ u32 num_extents;
+ u64 new_size;
+
+@@ -2068,6 +2069,7 @@ void btrfs_split_delalloc_extent(struct
+ void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
+ struct extent_state *other)
+ {
++ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ u64 new_size, old_size;
+ u32 num_extents;
+
+@@ -2081,7 +2083,7 @@ void btrfs_merge_delalloc_extent(struct
+ new_size = other->end - new->start + 1;
+
+ /* we're not bigger than the max, unreserve the space and go */
+- if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
++ if (new_size <= fs_info->max_extent_size) {
+ spin_lock(&BTRFS_I(inode)->lock);
+ btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
+ spin_unlock(&BTRFS_I(inode)->lock);
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -670,8 +670,11 @@ int btrfs_check_zoned_mode(struct btrfs_
+ }
+
+ fs_info->zone_size = zone_size;
+- fs_info->max_zone_append_size = max_zone_append_size;
++ fs_info->max_zone_append_size = ALIGN_DOWN(max_zone_append_size,
++ fs_info->sectorsize);
+ fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED;
++ if (fs_info->max_zone_append_size < fs_info->max_extent_size)
++ fs_info->max_extent_size = fs_info->max_zone_append_size;
+
+ /*
+ * Check mount options here, because we might change fs_info->zoned
--- /dev/null
+From foo@baz Thu Aug 25 02:07:49 PM CEST 2022
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Mon, 22 Aug 2022 15:07:02 +0900
+Subject: btrfs: zoned: revive max_zone_append_bytes
+To: linux-btrfs@vger.kernel.org, stable@vger.kernel.org
+Cc: Naohiro Aota <naohiro.aota@wdc.com>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, David Sterba <dsterba@suse.com>
+Message-ID: <20220822060704.1278361-4-naohiro.aota@wdc.com>
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit c2ae7b772ef4e86c5ddf3fd47bf59045ae96a414 upstream
+
+This patch is basically a revert of commit 5a80d1c6a270 ("btrfs: zoned:
+remove max_zone_append_size logic"), but without unnecessary ASSERT and
+check. The max_zone_append_size will be used as a hint to estimate the
+number of extents to cover delalloc/writeback region in the later commits.
+
+The size of a ZONE APPEND bio is also limited by queue_max_segments(), so
+this commit considers it to calculate max_zone_append_size. Technically, a
+bio can be larger than queue_max_segments() * PAGE_SIZE if the pages are
+contiguous. But, it is safe to consider "queue_max_segments() * PAGE_SIZE"
+as an upper limit of an extent size to calculate the number of extents
+needed to write data.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ctree.h | 2 ++
+ fs/btrfs/zoned.c | 17 +++++++++++++++++
+ fs/btrfs/zoned.h | 1 +
+ 3 files changed, 20 insertions(+)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -1017,6 +1017,8 @@ struct btrfs_fs_info {
+ u64 zoned;
+ };
+
++ /* Max size to emit ZONE_APPEND write command */
++ u64 max_zone_append_size;
+ struct mutex zoned_meta_io_lock;
+ spinlock_t treelog_bg_lock;
+ u64 treelog_bg;
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -386,6 +386,16 @@ int btrfs_get_dev_zone_info(struct btrfs
+ nr_sectors = bdev_nr_sectors(bdev);
+ zone_info->zone_size_shift = ilog2(zone_info->zone_size);
+ zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors);
++ /*
++ * We limit max_zone_append_size also by max_segments *
++ * PAGE_SIZE. Technically, we can have multiple pages per segment. But,
++ * since btrfs adds the pages one by one to a bio, and btrfs cannot
++ * increase the metadata reservation even if it increases the number of
++ * extents, it is safe to stick with the limit.
++ */
++ zone_info->max_zone_append_size =
++ min_t(u64, (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
++ (u64)bdev_max_segments(bdev) << PAGE_SHIFT);
+ if (!IS_ALIGNED(nr_sectors, zone_sectors))
+ zone_info->nr_zones++;
+
+@@ -570,6 +580,7 @@ int btrfs_check_zoned_mode(struct btrfs_
+ u64 zoned_devices = 0;
+ u64 nr_devices = 0;
+ u64 zone_size = 0;
++ u64 max_zone_append_size = 0;
+ const bool incompat_zoned = btrfs_fs_incompat(fs_info, ZONED);
+ int ret = 0;
+
+@@ -605,6 +616,11 @@ int btrfs_check_zoned_mode(struct btrfs_
+ ret = -EINVAL;
+ goto out;
+ }
++ if (!max_zone_append_size ||
++ (zone_info->max_zone_append_size &&
++ zone_info->max_zone_append_size < max_zone_append_size))
++ max_zone_append_size =
++ zone_info->max_zone_append_size;
+ }
+ nr_devices++;
+ }
+@@ -654,6 +670,7 @@ int btrfs_check_zoned_mode(struct btrfs_
+ }
+
+ fs_info->zone_size = zone_size;
++ fs_info->max_zone_append_size = max_zone_append_size;
+ fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED;
+
+ /*
+--- a/fs/btrfs/zoned.h
++++ b/fs/btrfs/zoned.h
+@@ -23,6 +23,7 @@ struct btrfs_zoned_device_info {
+ */
+ u64 zone_size;
+ u8 zone_size_shift;
++ u64 max_zone_append_size;
+ u32 nr_zones;
+ unsigned long *seq_zones;
+ unsigned long *empty_zones;
parisc-fix-exception-handler-for-fldw-and-fstw-instructions.patch
kernel-sys_ni-add-compat-entry-for-fadvise64_64.patch
x86-entry-move-cld-to-the-start-of-the-idtentry-macro.patch
+block-add-a-bdev_max_zone_append_sectors-helper.patch
+block-add-bdev_max_segments-helper.patch
+btrfs-zoned-revive-max_zone_append_bytes.patch
+btrfs-replace-btrfs_max_extent_size-with-fs_info-max_extent_size.patch
+btrfs-convert-count_max_extents-to-use-fs_info-max_extent_size.patch