From: Greg Kroah-Hartman Date: Thu, 25 Aug 2022 12:09:11 +0000 (+0200) Subject: 5.15-stable patches X-Git-Tag: v5.10.140~40 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=69302b1dc403da0567f46a2212fb276539f0c130;p=thirdparty%2Fkernel%2Fstable-queue.git 5.15-stable patches added patches: block-add-a-bdev_max_zone_append_sectors-helper.patch block-add-bdev_max_segments-helper.patch btrfs-convert-count_max_extents-to-use-fs_info-max_extent_size.patch btrfs-replace-btrfs_max_extent_size-with-fs_info-max_extent_size.patch btrfs-zoned-revive-max_zone_append_bytes.patch --- diff --git a/queue-5.15/block-add-a-bdev_max_zone_append_sectors-helper.patch b/queue-5.15/block-add-a-bdev_max_zone_append_sectors-helper.patch new file mode 100644 index 00000000000..2c9bb4fdaa1 --- /dev/null +++ b/queue-5.15/block-add-a-bdev_max_zone_append_sectors-helper.patch @@ -0,0 +1,74 @@ +From foo@baz Thu Aug 25 02:07:49 PM CEST 2022 +From: Naohiro Aota +Date: Mon, 22 Aug 2022 15:07:00 +0900 +Subject: block: add a bdev_max_zone_append_sectors helper +To: linux-btrfs@vger.kernel.org, stable@vger.kernel.org +Cc: Christoph Hellwig , Damien Le Moal , "Martin K . Petersen" , Johannes Thumshirn , Chaitanya Kulkarni , Jens Axboe +Message-ID: <20220822060704.1278361-2-naohiro.aota@wdc.com> + +From: Christoph Hellwig + +commit 2aba0d19f4d8c8929b4b3b94a9cfde2aa20e6ee2 upstream + +Add a helper to check the max supported sectors for zone append based on +the block_device instead of having to poke into the block layer internal +request_queue. + +Signed-off-by: Christoph Hellwig +Acked-by: Damien Le Moal +Reviewed-by: Martin K. Petersen +Reviewed-by: Johannes Thumshirn +Reviewed-by: Chaitanya Kulkarni +Link: https://lore.kernel.org/r/20220415045258.199825-16-hch@lst.de +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + drivers/nvme/target/zns.c | 3 +-- + fs/zonefs/super.c | 3 +-- + include/linux/blkdev.h | 6 ++++++ + 3 files changed, 8 insertions(+), 4 deletions(-) + +--- a/drivers/nvme/target/zns.c ++++ b/drivers/nvme/target/zns.c +@@ -34,8 +34,7 @@ static int validate_conv_zones_cb(struct + + bool nvmet_bdev_zns_enable(struct nvmet_ns *ns) + { +- struct request_queue *q = ns->bdev->bd_disk->queue; +- u8 zasl = nvmet_zasl(queue_max_zone_append_sectors(q)); ++ u8 zasl = nvmet_zasl(bdev_max_zone_append_sectors(ns->bdev)); + struct gendisk *bd_disk = ns->bdev->bd_disk; + int ret; + +--- a/fs/zonefs/super.c ++++ b/fs/zonefs/super.c +@@ -723,13 +723,12 @@ static ssize_t zonefs_file_dio_append(st + struct inode *inode = file_inode(iocb->ki_filp); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct block_device *bdev = inode->i_sb->s_bdev; +- unsigned int max; ++ unsigned int max = bdev_max_zone_append_sectors(bdev); + struct bio *bio; + ssize_t size; + int nr_pages; + ssize_t ret; + +- max = queue_max_zone_append_sectors(bdev_get_queue(bdev)); + max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize); + iov_iter_truncate(from, max); + +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -1387,6 +1387,12 @@ static inline unsigned int queue_max_zon + return min(l->max_zone_append_sectors, l->max_sectors); + } + ++static inline unsigned int ++bdev_max_zone_append_sectors(struct block_device *bdev) ++{ ++ return queue_max_zone_append_sectors(bdev_get_queue(bdev)); ++} ++ + static inline unsigned queue_logical_block_size(const struct request_queue *q) + { + int retval = 512; diff --git a/queue-5.15/block-add-bdev_max_segments-helper.patch b/queue-5.15/block-add-bdev_max_segments-helper.patch new file mode 100644 index 00000000000..13fe551460d --- /dev/null +++ b/queue-5.15/block-add-bdev_max_segments-helper.patch @@ -0,0 +1,38 @@ +From foo@baz Thu Aug 25 02:07:49 PM CEST 2022 +From: Naohiro Aota +Date: Mon, 22 Aug 2022 15:07:01 +0900 +Subject: block: add bdev_max_segments() helper +To: linux-btrfs@vger.kernel.org, stable@vger.kernel.org +Cc: Naohiro Aota , Johannes Thumshirn , Jens Axboe , Christoph Hellwig , David Sterba +Message-ID: <20220822060704.1278361-3-naohiro.aota@wdc.com> + +From: Naohiro Aota + +commit 65ea1b66482f415d51cd46515b02477257330339 upstream + +Add bdev_max_segments() like other queue parameters. + +Reviewed-by: Johannes Thumshirn +Reviewed-by: Jens Axboe +Reviewed-by: Christoph Hellwig +Signed-off-by: Naohiro Aota +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/blkdev.h | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -1393,6 +1393,11 @@ bdev_max_zone_append_sectors(struct bloc + return queue_max_zone_append_sectors(bdev_get_queue(bdev)); + } + ++static inline unsigned int bdev_max_segments(struct block_device *bdev) ++{ ++ return queue_max_segments(bdev_get_queue(bdev)); ++} ++ + static inline unsigned queue_logical_block_size(const struct request_queue *q) + { + int retval = 512; diff --git a/queue-5.15/btrfs-convert-count_max_extents-to-use-fs_info-max_extent_size.patch b/queue-5.15/btrfs-convert-count_max_extents-to-use-fs_info-max_extent_size.patch new file mode 100644 index 00000000000..d2cc545e228 --- /dev/null +++ b/queue-5.15/btrfs-convert-count_max_extents-to-use-fs_info-max_extent_size.patch @@ -0,0 +1,145 @@ +From foo@baz Thu Aug 25 02:07:49 PM CEST 2022 +From: Naohiro Aota +Date: Mon, 22 Aug 2022 15:07:04 +0900 +Subject: btrfs: convert count_max_extents() to use fs_info->max_extent_size +To: linux-btrfs@vger.kernel.org, stable@vger.kernel.org +Cc: Naohiro Aota , David Sterba +Message-ID: <20220822060704.1278361-6-naohiro.aota@wdc.com> + +From: Naohiro Aota + +commit 7d7672bc5d1038c745716c397d892d21e29de71c upstream + +If count_max_extents() uses BTRFS_MAX_EXTENT_SIZE to calculate the number +of extents needed, btrfs release the metadata reservation too much on its +way to write out the data. + +Now that BTRFS_MAX_EXTENT_SIZE is replaced with fs_info->max_extent_size, +convert count_max_extents() to use it instead, and fix the calculation of +the metadata reservation. + +CC: stable@vger.kernel.org # 5.12+ +Fixes: d8e3fb106f39 ("btrfs: zoned: use ZONE_APPEND write for zoned mode") +Signed-off-by: Naohiro Aota +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ctree.h | 21 +++++++++++++-------- + fs/btrfs/delalloc-space.c | 6 +++--- + fs/btrfs/inode.c | 16 ++++++++-------- + 3 files changed, 24 insertions(+), 19 deletions(-) + +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -105,14 +105,6 @@ struct btrfs_ref; + #define BTRFS_STAT_CURR 0 + #define BTRFS_STAT_PREV 1 + +-/* +- * Count how many BTRFS_MAX_EXTENT_SIZE cover the @size +- */ +-static inline u32 count_max_extents(u64 size) +-{ +- return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); +-} +- + static inline unsigned long btrfs_chunk_item_size(int num_stripes) + { + BUG_ON(num_stripes == 0); +@@ -3878,6 +3870,19 @@ static inline bool btrfs_is_zoned(const + return fs_info->zoned != 0; + } + ++/* ++ * Count how many fs_info->max_extent_size cover the @size ++ */ ++static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size) ++{ ++#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS ++ if (!fs_info) ++ return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); ++#endif ++ ++ return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size); ++} ++ + static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root) + { + return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID; +--- a/fs/btrfs/delalloc-space.c ++++ b/fs/btrfs/delalloc-space.c +@@ -273,7 +273,7 @@ static void calc_inode_reservations(stru + u64 num_bytes, u64 *meta_reserve, + u64 *qgroup_reserve) + { +- u64 nr_extents = count_max_extents(num_bytes); ++ u64 nr_extents = count_max_extents(fs_info, num_bytes); + u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes); + u64 inode_update = btrfs_calc_metadata_size(fs_info, 1); + +@@ -347,7 +347,7 @@ int btrfs_delalloc_reserve_metadata(stru + * needs to free the reservation we just made. + */ + spin_lock(&inode->lock); +- nr_extents = count_max_extents(num_bytes); ++ nr_extents = count_max_extents(fs_info, num_bytes); + btrfs_mod_outstanding_extents(inode, nr_extents); + inode->csum_bytes += num_bytes; + btrfs_calculate_inode_block_rsv_size(fs_info, inode); +@@ -410,7 +410,7 @@ void btrfs_delalloc_release_extents(stru + unsigned num_extents; + + spin_lock(&inode->lock); +- num_extents = count_max_extents(num_bytes); ++ num_extents = count_max_extents(fs_info, num_bytes); + btrfs_mod_outstanding_extents(inode, -num_extents); + btrfs_calculate_inode_block_rsv_size(fs_info, inode); + spin_unlock(&inode->lock); +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -2049,10 +2049,10 @@ void btrfs_split_delalloc_extent(struct + * applies here, just in reverse. + */ + new_size = orig->end - split + 1; +- num_extents = count_max_extents(new_size); ++ num_extents = count_max_extents(fs_info, new_size); + new_size = split - orig->start; +- num_extents += count_max_extents(new_size); +- if (count_max_extents(size) >= num_extents) ++ num_extents += count_max_extents(fs_info, new_size); ++ if (count_max_extents(fs_info, size) >= num_extents) + return; + } + +@@ -2109,10 +2109,10 @@ void btrfs_merge_delalloc_extent(struct + * this case. + */ + old_size = other->end - other->start + 1; +- num_extents = count_max_extents(old_size); ++ num_extents = count_max_extents(fs_info, old_size); + old_size = new->end - new->start + 1; +- num_extents += count_max_extents(old_size); +- if (count_max_extents(new_size) >= num_extents) ++ num_extents += count_max_extents(fs_info, old_size); ++ if (count_max_extents(fs_info, new_size) >= num_extents) + return; + + spin_lock(&BTRFS_I(inode)->lock); +@@ -2191,7 +2191,7 @@ void btrfs_set_delalloc_extent(struct in + if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 len = state->end + 1 - state->start; +- u32 num_extents = count_max_extents(len); ++ u32 num_extents = count_max_extents(fs_info, len); + bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode)); + + spin_lock(&BTRFS_I(inode)->lock); +@@ -2233,7 +2233,7 @@ void btrfs_clear_delalloc_extent(struct + struct btrfs_inode *inode = BTRFS_I(vfs_inode); + struct btrfs_fs_info *fs_info = btrfs_sb(vfs_inode->i_sb); + u64 len = state->end + 1 - state->start; +- u32 num_extents = count_max_extents(len); ++ u32 num_extents = count_max_extents(fs_info, len); + + if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) { + spin_lock(&inode->lock); diff --git a/queue-5.15/btrfs-replace-btrfs_max_extent_size-with-fs_info-max_extent_size.patch b/queue-5.15/btrfs-replace-btrfs_max_extent_size-with-fs_info-max_extent_size.patch new file mode 100644 index 00000000000..666647d4144 --- /dev/null +++ b/queue-5.15/btrfs-replace-btrfs_max_extent_size-with-fs_info-max_extent_size.patch @@ -0,0 +1,210 @@ +From foo@baz Thu Aug 25 02:07:49 PM CEST 2022 +From: Naohiro Aota +Date: Mon, 22 Aug 2022 15:07:03 +0900 +Subject: btrfs: replace BTRFS_MAX_EXTENT_SIZE with fs_info->max_extent_size +To: linux-btrfs@vger.kernel.org, stable@vger.kernel.org +Cc: Naohiro Aota , Johannes Thumshirn , David Sterba +Message-ID: <20220822060704.1278361-5-naohiro.aota@wdc.com> + +From: Naohiro Aota + +commit f7b12a62f008a3041f42f2426983e59a6a0a3c59 upstream + +On zoned filesystem, data write out is limited by max_zone_append_size, +and a large ordered extent is split according the size of a bio. OTOH, +the number of extents to be written is calculated using +BTRFS_MAX_EXTENT_SIZE, and that estimated number is used to reserve the +metadata bytes to update and/or create the metadata items. + +The metadata reservation is done at e.g, btrfs_buffered_write() and then +released according to the estimation changes. Thus, if the number of extent +increases massively, the reserved metadata can run out. + +The increase of the number of extents easily occurs on zoned filesystem +if BTRFS_MAX_EXTENT_SIZE > max_zone_append_size. And, it causes the +following warning on a small RAM environment with disabling metadata +over-commit (in the following patch). + +[75721.498492] ------------[ cut here ]------------ +[75721.505624] BTRFS: block rsv 1 returned -28 +[75721.512230] WARNING: CPU: 24 PID: 2327559 at fs/btrfs/block-rsv.c:537 btrfs_use_block_rsv+0x560/0x760 [btrfs] +[75721.581854] CPU: 24 PID: 2327559 Comm: kworker/u64:10 Kdump: loaded Tainted: G W 5.18.0-rc2-BTRFS-ZNS+ #109 +[75721.597200] Hardware name: Supermicro Super Server/H12SSL-NT, BIOS 2.0 02/22/2021 +[75721.607310] Workqueue: btrfs-endio-write btrfs_work_helper [btrfs] +[75721.616209] RIP: 0010:btrfs_use_block_rsv+0x560/0x760 [btrfs] +[75721.646649] RSP: 0018:ffffc9000fbdf3e0 EFLAGS: 00010286 +[75721.654126] RAX: 0000000000000000 RBX: 0000000000004000 RCX: 0000000000000000 +[75721.663524] RDX: 0000000000000004 RSI: 0000000000000008 RDI: fffff52001f7be6e +[75721.672921] RBP: ffffc9000fbdf420 R08: 0000000000000001 R09: ffff889f8d1fc6c7 +[75721.682493] R10: ffffed13f1a3f8d8 R11: 0000000000000001 R12: ffff88980a3c0e28 +[75721.692284] R13: ffff889b66590000 R14: ffff88980a3c0e40 R15: ffff88980a3c0e8a +[75721.701878] FS: 0000000000000000(0000) GS:ffff889f8d000000(0000) knlGS:0000000000000000 +[75721.712601] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[75721.720726] CR2: 000055d12e05c018 CR3: 0000800193594000 CR4: 0000000000350ee0 +[75721.730499] Call Trace: +[75721.735166] +[75721.739886] btrfs_alloc_tree_block+0x1e1/0x1100 [btrfs] +[75721.747545] ? btrfs_alloc_logged_file_extent+0x550/0x550 [btrfs] +[75721.756145] ? btrfs_get_32+0xea/0x2d0 [btrfs] +[75721.762852] ? btrfs_get_32+0xea/0x2d0 [btrfs] +[75721.769520] ? push_leaf_left+0x420/0x620 [btrfs] +[75721.776431] ? memcpy+0x4e/0x60 +[75721.781931] split_leaf+0x433/0x12d0 [btrfs] +[75721.788392] ? btrfs_get_token_32+0x580/0x580 [btrfs] +[75721.795636] ? push_for_double_split.isra.0+0x420/0x420 [btrfs] +[75721.803759] ? leaf_space_used+0x15d/0x1a0 [btrfs] +[75721.811156] btrfs_search_slot+0x1bc3/0x2790 [btrfs] +[75721.818300] ? lock_downgrade+0x7c0/0x7c0 +[75721.824411] ? free_extent_buffer.part.0+0x107/0x200 [btrfs] +[75721.832456] ? split_leaf+0x12d0/0x12d0 [btrfs] +[75721.839149] ? free_extent_buffer.part.0+0x14f/0x200 [btrfs] +[75721.846945] ? free_extent_buffer+0x13/0x20 [btrfs] +[75721.853960] ? btrfs_release_path+0x4b/0x190 [btrfs] +[75721.861429] btrfs_csum_file_blocks+0x85c/0x1500 [btrfs] +[75721.869313] ? rcu_read_lock_sched_held+0x16/0x80 +[75721.876085] ? lock_release+0x552/0xf80 +[75721.881957] ? btrfs_del_csums+0x8c0/0x8c0 [btrfs] +[75721.888886] ? __kasan_check_write+0x14/0x20 +[75721.895152] ? do_raw_read_unlock+0x44/0x80 +[75721.901323] ? _raw_write_lock_irq+0x60/0x80 +[75721.907983] ? btrfs_global_root+0xb9/0xe0 [btrfs] +[75721.915166] ? btrfs_csum_root+0x12b/0x180 [btrfs] +[75721.921918] ? btrfs_get_global_root+0x820/0x820 [btrfs] +[75721.929166] ? _raw_write_unlock+0x23/0x40 +[75721.935116] ? unpin_extent_cache+0x1e3/0x390 [btrfs] +[75721.942041] btrfs_finish_ordered_io.isra.0+0xa0c/0x1dc0 [btrfs] +[75721.949906] ? try_to_wake_up+0x30/0x14a0 +[75721.955700] ? btrfs_unlink_subvol+0xda0/0xda0 [btrfs] +[75721.962661] ? rcu_read_lock_sched_held+0x16/0x80 +[75721.969111] ? lock_acquire+0x41b/0x4c0 +[75721.974982] finish_ordered_fn+0x15/0x20 [btrfs] +[75721.981639] btrfs_work_helper+0x1af/0xa80 [btrfs] +[75721.988184] ? _raw_spin_unlock_irq+0x28/0x50 +[75721.994643] process_one_work+0x815/0x1460 +[75722.000444] ? pwq_dec_nr_in_flight+0x250/0x250 +[75722.006643] ? do_raw_spin_trylock+0xbb/0x190 +[75722.013086] worker_thread+0x59a/0xeb0 +[75722.018511] kthread+0x2ac/0x360 +[75722.023428] ? process_one_work+0x1460/0x1460 +[75722.029431] ? kthread_complete_and_exit+0x30/0x30 +[75722.036044] ret_from_fork+0x22/0x30 +[75722.041255] +[75722.045047] irq event stamp: 0 +[75722.049703] hardirqs last enabled at (0): [<0000000000000000>] 0x0 +[75722.057610] hardirqs last disabled at (0): [] copy_process+0x1c1a/0x66b0 +[75722.067533] softirqs last enabled at (0): [] copy_process+0x1c59/0x66b0 +[75722.077423] softirqs last disabled at (0): [<0000000000000000>] 0x0 +[75722.085335] ---[ end trace 0000000000000000 ]--- + +To fix the estimation, we need to introduce fs_info->max_extent_size to +replace BTRFS_MAX_EXTENT_SIZE, which allow setting the different size for +regular vs zoned filesystem. + +Set fs_info->max_extent_size to BTRFS_MAX_EXTENT_SIZE by default. On zoned +filesystem, it is set to fs_info->max_zone_append_size. + +CC: stable@vger.kernel.org # 5.12+ +Fixes: d8e3fb106f39 ("btrfs: zoned: use ZONE_APPEND write for zoned mode") +Reviewed-by: Johannes Thumshirn +Signed-off-by: Naohiro Aota +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ctree.h | 6 ++++++ + fs/btrfs/disk-io.c | 2 ++ + fs/btrfs/extent_io.c | 4 +++- + fs/btrfs/inode.c | 6 ++++-- + fs/btrfs/zoned.c | 5 ++++- + 5 files changed, 19 insertions(+), 4 deletions(-) + +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -999,6 +999,12 @@ struct btrfs_fs_info { + u32 csums_per_leaf; + u32 stripesize; + ++ /* ++ * Maximum size of an extent. BTRFS_MAX_EXTENT_SIZE on regular ++ * filesystem, on zoned it depends on the device constraints. ++ */ ++ u64 max_extent_size; ++ + /* Block groups and devices containing active swapfiles. */ + spinlock_t swapfile_pins_lock; + struct rb_root swapfile_pins; +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -3006,6 +3006,8 @@ void btrfs_init_fs_info(struct btrfs_fs_ + fs_info->sectorsize_bits = ilog2(4096); + fs_info->stripesize = 4096; + ++ fs_info->max_extent_size = BTRFS_MAX_EXTENT_SIZE; ++ + spin_lock_init(&fs_info->swapfile_pins_lock); + fs_info->swapfile_pins = RB_ROOT; + +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -1985,8 +1985,10 @@ noinline_for_stack bool find_lock_delall + struct page *locked_page, u64 *start, + u64 *end) + { ++ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; +- u64 max_bytes = BTRFS_MAX_EXTENT_SIZE; ++ /* The sanity tests may not set a valid fs_info. */ ++ u64 max_bytes = fs_info ? fs_info->max_extent_size : BTRFS_MAX_EXTENT_SIZE; + u64 delalloc_start; + u64 delalloc_end; + bool found; +--- a/fs/btrfs/inode.c ++++ b/fs/btrfs/inode.c +@@ -2032,6 +2032,7 @@ int btrfs_run_delalloc_range(struct btrf + void btrfs_split_delalloc_extent(struct inode *inode, + struct extent_state *orig, u64 split) + { ++ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + u64 size; + + /* not delalloc, ignore it */ +@@ -2039,7 +2040,7 @@ void btrfs_split_delalloc_extent(struct + return; + + size = orig->end - orig->start + 1; +- if (size > BTRFS_MAX_EXTENT_SIZE) { ++ if (size > fs_info->max_extent_size) { + u32 num_extents; + u64 new_size; + +@@ -2068,6 +2069,7 @@ void btrfs_split_delalloc_extent(struct + void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new, + struct extent_state *other) + { ++ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + u64 new_size, old_size; + u32 num_extents; + +@@ -2081,7 +2083,7 @@ void btrfs_merge_delalloc_extent(struct + new_size = other->end - new->start + 1; + + /* we're not bigger than the max, unreserve the space and go */ +- if (new_size <= BTRFS_MAX_EXTENT_SIZE) { ++ if (new_size <= fs_info->max_extent_size) { + spin_lock(&BTRFS_I(inode)->lock); + btrfs_mod_outstanding_extents(BTRFS_I(inode), -1); + spin_unlock(&BTRFS_I(inode)->lock); +--- a/fs/btrfs/zoned.c ++++ b/fs/btrfs/zoned.c +@@ -670,8 +670,11 @@ int btrfs_check_zoned_mode(struct btrfs_ + } + + fs_info->zone_size = zone_size; +- fs_info->max_zone_append_size = max_zone_append_size; ++ fs_info->max_zone_append_size = ALIGN_DOWN(max_zone_append_size, ++ fs_info->sectorsize); + fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED; ++ if (fs_info->max_zone_append_size < fs_info->max_extent_size) ++ fs_info->max_extent_size = fs_info->max_zone_append_size; + + /* + * Check mount options here, because we might change fs_info->zoned diff --git a/queue-5.15/btrfs-zoned-revive-max_zone_append_bytes.patch b/queue-5.15/btrfs-zoned-revive-max_zone_append_bytes.patch new file mode 100644 index 00000000000..ad77cb88ef7 --- /dev/null +++ b/queue-5.15/btrfs-zoned-revive-max_zone_append_bytes.patch @@ -0,0 +1,102 @@ +From foo@baz Thu Aug 25 02:07:49 PM CEST 2022 +From: Naohiro Aota +Date: Mon, 22 Aug 2022 15:07:02 +0900 +Subject: btrfs: zoned: revive max_zone_append_bytes +To: linux-btrfs@vger.kernel.org, stable@vger.kernel.org +Cc: Naohiro Aota , Johannes Thumshirn , David Sterba +Message-ID: <20220822060704.1278361-4-naohiro.aota@wdc.com> + +From: Naohiro Aota + +commit c2ae7b772ef4e86c5ddf3fd47bf59045ae96a414 upstream + +This patch is basically a revert of commit 5a80d1c6a270 ("btrfs: zoned: +remove max_zone_append_size logic"), but without unnecessary ASSERT and +check. The max_zone_append_size will be used as a hint to estimate the +number of extents to cover delalloc/writeback region in the later commits. + +The size of a ZONE APPEND bio is also limited by queue_max_segments(), so +this commit considers it to calculate max_zone_append_size. Technically, a +bio can be larger than queue_max_segments() * PAGE_SIZE if the pages are +contiguous. But, it is safe to consider "queue_max_segments() * PAGE_SIZE" +as an upper limit of an extent size to calculate the number of extents +needed to write data. + +Reviewed-by: Johannes Thumshirn +Signed-off-by: Naohiro Aota +Signed-off-by: David Sterba +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/ctree.h | 2 ++ + fs/btrfs/zoned.c | 17 +++++++++++++++++ + fs/btrfs/zoned.h | 1 + + 3 files changed, 20 insertions(+) + +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -1017,6 +1017,8 @@ struct btrfs_fs_info { + u64 zoned; + }; + ++ /* Max size to emit ZONE_APPEND write command */ ++ u64 max_zone_append_size; + struct mutex zoned_meta_io_lock; + spinlock_t treelog_bg_lock; + u64 treelog_bg; +--- a/fs/btrfs/zoned.c ++++ b/fs/btrfs/zoned.c +@@ -386,6 +386,16 @@ int btrfs_get_dev_zone_info(struct btrfs + nr_sectors = bdev_nr_sectors(bdev); + zone_info->zone_size_shift = ilog2(zone_info->zone_size); + zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors); ++ /* ++ * We limit max_zone_append_size also by max_segments * ++ * PAGE_SIZE. Technically, we can have multiple pages per segment. But, ++ * since btrfs adds the pages one by one to a bio, and btrfs cannot ++ * increase the metadata reservation even if it increases the number of ++ * extents, it is safe to stick with the limit. ++ */ ++ zone_info->max_zone_append_size = ++ min_t(u64, (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT, ++ (u64)bdev_max_segments(bdev) << PAGE_SHIFT); + if (!IS_ALIGNED(nr_sectors, zone_sectors)) + zone_info->nr_zones++; + +@@ -570,6 +580,7 @@ int btrfs_check_zoned_mode(struct btrfs_ + u64 zoned_devices = 0; + u64 nr_devices = 0; + u64 zone_size = 0; ++ u64 max_zone_append_size = 0; + const bool incompat_zoned = btrfs_fs_incompat(fs_info, ZONED); + int ret = 0; + +@@ -605,6 +616,11 @@ int btrfs_check_zoned_mode(struct btrfs_ + ret = -EINVAL; + goto out; + } ++ if (!max_zone_append_size || ++ (zone_info->max_zone_append_size && ++ zone_info->max_zone_append_size < max_zone_append_size)) ++ max_zone_append_size = ++ zone_info->max_zone_append_size; + } + nr_devices++; + } +@@ -654,6 +670,7 @@ int btrfs_check_zoned_mode(struct btrfs_ + } + + fs_info->zone_size = zone_size; ++ fs_info->max_zone_append_size = max_zone_append_size; + fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED; + + /* +--- a/fs/btrfs/zoned.h ++++ b/fs/btrfs/zoned.h +@@ -23,6 +23,7 @@ struct btrfs_zoned_device_info { + */ + u64 zone_size; + u8 zone_size_shift; ++ u64 max_zone_append_size; + u32 nr_zones; + unsigned long *seq_zones; + unsigned long *empty_zones; diff --git a/queue-5.15/series b/queue-5.15/series index 12e2927d659..469c0f6cc1e 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -6,3 +6,8 @@ parisc-make-config_64bit-available-for-arch-parisc64-only.patch parisc-fix-exception-handler-for-fldw-and-fstw-instructions.patch kernel-sys_ni-add-compat-entry-for-fadvise64_64.patch x86-entry-move-cld-to-the-start-of-the-idtentry-macro.patch +block-add-a-bdev_max_zone_append_sectors-helper.patch +block-add-bdev_max_segments-helper.patch +btrfs-zoned-revive-max_zone_append_bytes.patch +btrfs-replace-btrfs_max_extent_size-with-fs_info-max_extent_size.patch +btrfs-convert-count_max_extents-to-use-fs_info-max_extent_size.patch