]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 25 Aug 2022 12:09:11 +0000 (14:09 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 25 Aug 2022 12:09:11 +0000 (14:09 +0200)
added patches:
block-add-a-bdev_max_zone_append_sectors-helper.patch
block-add-bdev_max_segments-helper.patch
btrfs-convert-count_max_extents-to-use-fs_info-max_extent_size.patch
btrfs-replace-btrfs_max_extent_size-with-fs_info-max_extent_size.patch
btrfs-zoned-revive-max_zone_append_bytes.patch

queue-5.15/block-add-a-bdev_max_zone_append_sectors-helper.patch [new file with mode: 0644]
queue-5.15/block-add-bdev_max_segments-helper.patch [new file with mode: 0644]
queue-5.15/btrfs-convert-count_max_extents-to-use-fs_info-max_extent_size.patch [new file with mode: 0644]
queue-5.15/btrfs-replace-btrfs_max_extent_size-with-fs_info-max_extent_size.patch [new file with mode: 0644]
queue-5.15/btrfs-zoned-revive-max_zone_append_bytes.patch [new file with mode: 0644]
queue-5.15/series

diff --git a/queue-5.15/block-add-a-bdev_max_zone_append_sectors-helper.patch b/queue-5.15/block-add-a-bdev_max_zone_append_sectors-helper.patch
new file mode 100644 (file)
index 0000000..2c9bb4f
--- /dev/null
@@ -0,0 +1,74 @@
+From foo@baz Thu Aug 25 02:07:49 PM CEST 2022
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Mon, 22 Aug 2022 15:07:00 +0900
+Subject: block: add a bdev_max_zone_append_sectors helper
+To: linux-btrfs@vger.kernel.org, stable@vger.kernel.org
+Cc: Christoph Hellwig <hch@lst.de>, Damien Le Moal <damien.lemoal@opensource.wdc.com>, "Martin K . Petersen" <martin.petersen@oracle.com>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, Chaitanya Kulkarni <kch@nvidia.com>, Jens Axboe <axboe@kernel.dk>
+Message-ID: <20220822060704.1278361-2-naohiro.aota@wdc.com>
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 2aba0d19f4d8c8929b4b3b94a9cfde2aa20e6ee2 upstream
+
+Add a helper to check the max supported sectors for zone append based on
+the block_device instead of having to poke into the block layer internal
+request_queue.
+
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
+Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
+Link: https://lore.kernel.org/r/20220415045258.199825-16-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/nvme/target/zns.c |    3 +--
+ fs/zonefs/super.c         |    3 +--
+ include/linux/blkdev.h    |    6 ++++++
+ 3 files changed, 8 insertions(+), 4 deletions(-)
+
+--- a/drivers/nvme/target/zns.c
++++ b/drivers/nvme/target/zns.c
+@@ -34,8 +34,7 @@ static int validate_conv_zones_cb(struct
+ bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
+ {
+-      struct request_queue *q = ns->bdev->bd_disk->queue;
+-      u8 zasl = nvmet_zasl(queue_max_zone_append_sectors(q));
++      u8 zasl = nvmet_zasl(bdev_max_zone_append_sectors(ns->bdev));
+       struct gendisk *bd_disk = ns->bdev->bd_disk;
+       int ret;
+--- a/fs/zonefs/super.c
++++ b/fs/zonefs/super.c
+@@ -723,13 +723,12 @@ static ssize_t zonefs_file_dio_append(st
+       struct inode *inode = file_inode(iocb->ki_filp);
+       struct zonefs_inode_info *zi = ZONEFS_I(inode);
+       struct block_device *bdev = inode->i_sb->s_bdev;
+-      unsigned int max;
++      unsigned int max = bdev_max_zone_append_sectors(bdev);
+       struct bio *bio;
+       ssize_t size;
+       int nr_pages;
+       ssize_t ret;
+-      max = queue_max_zone_append_sectors(bdev_get_queue(bdev));
+       max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
+       iov_iter_truncate(from, max);
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -1387,6 +1387,12 @@ static inline unsigned int queue_max_zon
+       return min(l->max_zone_append_sectors, l->max_sectors);
+ }
++static inline unsigned int
++bdev_max_zone_append_sectors(struct block_device *bdev)
++{
++      return queue_max_zone_append_sectors(bdev_get_queue(bdev));
++}
++
+ static inline unsigned queue_logical_block_size(const struct request_queue *q)
+ {
+       int retval = 512;
diff --git a/queue-5.15/block-add-bdev_max_segments-helper.patch b/queue-5.15/block-add-bdev_max_segments-helper.patch
new file mode 100644 (file)
index 0000000..13fe551
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Thu Aug 25 02:07:49 PM CEST 2022
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Mon, 22 Aug 2022 15:07:01 +0900
+Subject: block: add bdev_max_segments() helper
+To: linux-btrfs@vger.kernel.org, stable@vger.kernel.org
+Cc: Naohiro Aota <naohiro.aota@wdc.com>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, Jens Axboe <axboe@kernel.dk>, Christoph Hellwig <hch@lst.de>, David Sterba <dsterba@suse.com>
+Message-ID: <20220822060704.1278361-3-naohiro.aota@wdc.com>
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit 65ea1b66482f415d51cd46515b02477257330339 upstream
+
+Add bdev_max_segments() like other queue parameters.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: Jens Axboe <axboe@kernel.dk>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/blkdev.h |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -1393,6 +1393,11 @@ bdev_max_zone_append_sectors(struct bloc
+       return queue_max_zone_append_sectors(bdev_get_queue(bdev));
+ }
++static inline unsigned int bdev_max_segments(struct block_device *bdev)
++{
++      return queue_max_segments(bdev_get_queue(bdev));
++}
++
+ static inline unsigned queue_logical_block_size(const struct request_queue *q)
+ {
+       int retval = 512;
diff --git a/queue-5.15/btrfs-convert-count_max_extents-to-use-fs_info-max_extent_size.patch b/queue-5.15/btrfs-convert-count_max_extents-to-use-fs_info-max_extent_size.patch
new file mode 100644 (file)
index 0000000..d2cc545
--- /dev/null
@@ -0,0 +1,145 @@
+From foo@baz Thu Aug 25 02:07:49 PM CEST 2022
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Mon, 22 Aug 2022 15:07:04 +0900
+Subject: btrfs: convert count_max_extents() to use fs_info->max_extent_size
+To: linux-btrfs@vger.kernel.org, stable@vger.kernel.org
+Cc: Naohiro Aota <naohiro.aota@wdc.com>, David Sterba <dsterba@suse.com>
+Message-ID: <20220822060704.1278361-6-naohiro.aota@wdc.com>
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit 7d7672bc5d1038c745716c397d892d21e29de71c upstream
+
+If count_max_extents() uses BTRFS_MAX_EXTENT_SIZE to calculate the number
+of extents needed, btrfs release the metadata reservation too much on its
+way to write out the data.
+
+Now that BTRFS_MAX_EXTENT_SIZE is replaced with fs_info->max_extent_size,
+convert count_max_extents() to use it instead, and fix the calculation of
+the metadata reservation.
+
+CC: stable@vger.kernel.org # 5.12+
+Fixes: d8e3fb106f39 ("btrfs: zoned: use ZONE_APPEND write for zoned mode")
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ctree.h          |   21 +++++++++++++--------
+ fs/btrfs/delalloc-space.c |    6 +++---
+ fs/btrfs/inode.c          |   16 ++++++++--------
+ 3 files changed, 24 insertions(+), 19 deletions(-)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -105,14 +105,6 @@ struct btrfs_ref;
+ #define BTRFS_STAT_CURR               0
+ #define BTRFS_STAT_PREV               1
+-/*
+- * Count how many BTRFS_MAX_EXTENT_SIZE cover the @size
+- */
+-static inline u32 count_max_extents(u64 size)
+-{
+-      return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
+-}
+-
+ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
+ {
+       BUG_ON(num_stripes == 0);
+@@ -3878,6 +3870,19 @@ static inline bool btrfs_is_zoned(const
+       return fs_info->zoned != 0;
+ }
++/*
++ * Count how many fs_info->max_extent_size cover the @size
++ */
++static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size)
++{
++#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
++      if (!fs_info)
++              return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
++#endif
++
++      return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size);
++}
++
+ static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
+ {
+       return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID;
+--- a/fs/btrfs/delalloc-space.c
++++ b/fs/btrfs/delalloc-space.c
+@@ -273,7 +273,7 @@ static void calc_inode_reservations(stru
+                                   u64 num_bytes, u64 *meta_reserve,
+                                   u64 *qgroup_reserve)
+ {
+-      u64 nr_extents = count_max_extents(num_bytes);
++      u64 nr_extents = count_max_extents(fs_info, num_bytes);
+       u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes);
+       u64 inode_update = btrfs_calc_metadata_size(fs_info, 1);
+@@ -347,7 +347,7 @@ int btrfs_delalloc_reserve_metadata(stru
+        * needs to free the reservation we just made.
+        */
+       spin_lock(&inode->lock);
+-      nr_extents = count_max_extents(num_bytes);
++      nr_extents = count_max_extents(fs_info, num_bytes);
+       btrfs_mod_outstanding_extents(inode, nr_extents);
+       inode->csum_bytes += num_bytes;
+       btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+@@ -410,7 +410,7 @@ void btrfs_delalloc_release_extents(stru
+       unsigned num_extents;
+       spin_lock(&inode->lock);
+-      num_extents = count_max_extents(num_bytes);
++      num_extents = count_max_extents(fs_info, num_bytes);
+       btrfs_mod_outstanding_extents(inode, -num_extents);
+       btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+       spin_unlock(&inode->lock);
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -2049,10 +2049,10 @@ void btrfs_split_delalloc_extent(struct
+                * applies here, just in reverse.
+                */
+               new_size = orig->end - split + 1;
+-              num_extents = count_max_extents(new_size);
++              num_extents = count_max_extents(fs_info, new_size);
+               new_size = split - orig->start;
+-              num_extents += count_max_extents(new_size);
+-              if (count_max_extents(size) >= num_extents)
++              num_extents += count_max_extents(fs_info, new_size);
++              if (count_max_extents(fs_info, size) >= num_extents)
+                       return;
+       }
+@@ -2109,10 +2109,10 @@ void btrfs_merge_delalloc_extent(struct
+        * this case.
+        */
+       old_size = other->end - other->start + 1;
+-      num_extents = count_max_extents(old_size);
++      num_extents = count_max_extents(fs_info, old_size);
+       old_size = new->end - new->start + 1;
+-      num_extents += count_max_extents(old_size);
+-      if (count_max_extents(new_size) >= num_extents)
++      num_extents += count_max_extents(fs_info, old_size);
++      if (count_max_extents(fs_info, new_size) >= num_extents)
+               return;
+       spin_lock(&BTRFS_I(inode)->lock);
+@@ -2191,7 +2191,7 @@ void btrfs_set_delalloc_extent(struct in
+       if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
+               struct btrfs_root *root = BTRFS_I(inode)->root;
+               u64 len = state->end + 1 - state->start;
+-              u32 num_extents = count_max_extents(len);
++              u32 num_extents = count_max_extents(fs_info, len);
+               bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
+               spin_lock(&BTRFS_I(inode)->lock);
+@@ -2233,7 +2233,7 @@ void btrfs_clear_delalloc_extent(struct
+       struct btrfs_inode *inode = BTRFS_I(vfs_inode);
+       struct btrfs_fs_info *fs_info = btrfs_sb(vfs_inode->i_sb);
+       u64 len = state->end + 1 - state->start;
+-      u32 num_extents = count_max_extents(len);
++      u32 num_extents = count_max_extents(fs_info, len);
+       if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) {
+               spin_lock(&inode->lock);
diff --git a/queue-5.15/btrfs-replace-btrfs_max_extent_size-with-fs_info-max_extent_size.patch b/queue-5.15/btrfs-replace-btrfs_max_extent_size-with-fs_info-max_extent_size.patch
new file mode 100644 (file)
index 0000000..666647d
--- /dev/null
@@ -0,0 +1,210 @@
+From foo@baz Thu Aug 25 02:07:49 PM CEST 2022
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Mon, 22 Aug 2022 15:07:03 +0900
+Subject: btrfs: replace BTRFS_MAX_EXTENT_SIZE with fs_info->max_extent_size
+To: linux-btrfs@vger.kernel.org, stable@vger.kernel.org
+Cc: Naohiro Aota <naohiro.aota@wdc.com>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, David Sterba <dsterba@suse.com>
+Message-ID: <20220822060704.1278361-5-naohiro.aota@wdc.com>
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit f7b12a62f008a3041f42f2426983e59a6a0a3c59 upstream
+
+On zoned filesystem, data write out is limited by max_zone_append_size,
+and a large ordered extent is split according the size of a bio. OTOH,
+the number of extents to be written is calculated using
+BTRFS_MAX_EXTENT_SIZE, and that estimated number is used to reserve the
+metadata bytes to update and/or create the metadata items.
+
+The metadata reservation is done at e.g, btrfs_buffered_write() and then
+released according to the estimation changes. Thus, if the number of extent
+increases massively, the reserved metadata can run out.
+
+The increase of the number of extents easily occurs on zoned filesystem
+if BTRFS_MAX_EXTENT_SIZE > max_zone_append_size. And, it causes the
+following warning on a small RAM environment with disabling metadata
+over-commit (in the following patch).
+
+[75721.498492] ------------[ cut here ]------------
+[75721.505624] BTRFS: block rsv 1 returned -28
+[75721.512230] WARNING: CPU: 24 PID: 2327559 at fs/btrfs/block-rsv.c:537 btrfs_use_block_rsv+0x560/0x760 [btrfs]
+[75721.581854] CPU: 24 PID: 2327559 Comm: kworker/u64:10 Kdump: loaded Tainted: G        W         5.18.0-rc2-BTRFS-ZNS+ #109
+[75721.597200] Hardware name: Supermicro Super Server/H12SSL-NT, BIOS 2.0 02/22/2021
+[75721.607310] Workqueue: btrfs-endio-write btrfs_work_helper [btrfs]
+[75721.616209] RIP: 0010:btrfs_use_block_rsv+0x560/0x760 [btrfs]
+[75721.646649] RSP: 0018:ffffc9000fbdf3e0 EFLAGS: 00010286
+[75721.654126] RAX: 0000000000000000 RBX: 0000000000004000 RCX: 0000000000000000
+[75721.663524] RDX: 0000000000000004 RSI: 0000000000000008 RDI: fffff52001f7be6e
+[75721.672921] RBP: ffffc9000fbdf420 R08: 0000000000000001 R09: ffff889f8d1fc6c7
+[75721.682493] R10: ffffed13f1a3f8d8 R11: 0000000000000001 R12: ffff88980a3c0e28
+[75721.692284] R13: ffff889b66590000 R14: ffff88980a3c0e40 R15: ffff88980a3c0e8a
+[75721.701878] FS:  0000000000000000(0000) GS:ffff889f8d000000(0000) knlGS:0000000000000000
+[75721.712601] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[75721.720726] CR2: 000055d12e05c018 CR3: 0000800193594000 CR4: 0000000000350ee0
+[75721.730499] Call Trace:
+[75721.735166]  <TASK>
+[75721.739886]  btrfs_alloc_tree_block+0x1e1/0x1100 [btrfs]
+[75721.747545]  ? btrfs_alloc_logged_file_extent+0x550/0x550 [btrfs]
+[75721.756145]  ? btrfs_get_32+0xea/0x2d0 [btrfs]
+[75721.762852]  ? btrfs_get_32+0xea/0x2d0 [btrfs]
+[75721.769520]  ? push_leaf_left+0x420/0x620 [btrfs]
+[75721.776431]  ? memcpy+0x4e/0x60
+[75721.781931]  split_leaf+0x433/0x12d0 [btrfs]
+[75721.788392]  ? btrfs_get_token_32+0x580/0x580 [btrfs]
+[75721.795636]  ? push_for_double_split.isra.0+0x420/0x420 [btrfs]
+[75721.803759]  ? leaf_space_used+0x15d/0x1a0 [btrfs]
+[75721.811156]  btrfs_search_slot+0x1bc3/0x2790 [btrfs]
+[75721.818300]  ? lock_downgrade+0x7c0/0x7c0
+[75721.824411]  ? free_extent_buffer.part.0+0x107/0x200 [btrfs]
+[75721.832456]  ? split_leaf+0x12d0/0x12d0 [btrfs]
+[75721.839149]  ? free_extent_buffer.part.0+0x14f/0x200 [btrfs]
+[75721.846945]  ? free_extent_buffer+0x13/0x20 [btrfs]
+[75721.853960]  ? btrfs_release_path+0x4b/0x190 [btrfs]
+[75721.861429]  btrfs_csum_file_blocks+0x85c/0x1500 [btrfs]
+[75721.869313]  ? rcu_read_lock_sched_held+0x16/0x80
+[75721.876085]  ? lock_release+0x552/0xf80
+[75721.881957]  ? btrfs_del_csums+0x8c0/0x8c0 [btrfs]
+[75721.888886]  ? __kasan_check_write+0x14/0x20
+[75721.895152]  ? do_raw_read_unlock+0x44/0x80
+[75721.901323]  ? _raw_write_lock_irq+0x60/0x80
+[75721.907983]  ? btrfs_global_root+0xb9/0xe0 [btrfs]
+[75721.915166]  ? btrfs_csum_root+0x12b/0x180 [btrfs]
+[75721.921918]  ? btrfs_get_global_root+0x820/0x820 [btrfs]
+[75721.929166]  ? _raw_write_unlock+0x23/0x40
+[75721.935116]  ? unpin_extent_cache+0x1e3/0x390 [btrfs]
+[75721.942041]  btrfs_finish_ordered_io.isra.0+0xa0c/0x1dc0 [btrfs]
+[75721.949906]  ? try_to_wake_up+0x30/0x14a0
+[75721.955700]  ? btrfs_unlink_subvol+0xda0/0xda0 [btrfs]
+[75721.962661]  ? rcu_read_lock_sched_held+0x16/0x80
+[75721.969111]  ? lock_acquire+0x41b/0x4c0
+[75721.974982]  finish_ordered_fn+0x15/0x20 [btrfs]
+[75721.981639]  btrfs_work_helper+0x1af/0xa80 [btrfs]
+[75721.988184]  ? _raw_spin_unlock_irq+0x28/0x50
+[75721.994643]  process_one_work+0x815/0x1460
+[75722.000444]  ? pwq_dec_nr_in_flight+0x250/0x250
+[75722.006643]  ? do_raw_spin_trylock+0xbb/0x190
+[75722.013086]  worker_thread+0x59a/0xeb0
+[75722.018511]  kthread+0x2ac/0x360
+[75722.023428]  ? process_one_work+0x1460/0x1460
+[75722.029431]  ? kthread_complete_and_exit+0x30/0x30
+[75722.036044]  ret_from_fork+0x22/0x30
+[75722.041255]  </TASK>
+[75722.045047] irq event stamp: 0
+[75722.049703] hardirqs last  enabled at (0): [<0000000000000000>] 0x0
+[75722.057610] hardirqs last disabled at (0): [<ffffffff8118a94a>] copy_process+0x1c1a/0x66b0
+[75722.067533] softirqs last  enabled at (0): [<ffffffff8118a989>] copy_process+0x1c59/0x66b0
+[75722.077423] softirqs last disabled at (0): [<0000000000000000>] 0x0
+[75722.085335] ---[ end trace 0000000000000000 ]---
+
+To fix the estimation, we need to introduce fs_info->max_extent_size to
+replace BTRFS_MAX_EXTENT_SIZE, which allow setting the different size for
+regular vs zoned filesystem.
+
+Set fs_info->max_extent_size to BTRFS_MAX_EXTENT_SIZE by default. On zoned
+filesystem, it is set to fs_info->max_zone_append_size.
+
+CC: stable@vger.kernel.org # 5.12+
+Fixes: d8e3fb106f39 ("btrfs: zoned: use ZONE_APPEND write for zoned mode")
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ctree.h     |    6 ++++++
+ fs/btrfs/disk-io.c   |    2 ++
+ fs/btrfs/extent_io.c |    4 +++-
+ fs/btrfs/inode.c     |    6 ++++--
+ fs/btrfs/zoned.c     |    5 ++++-
+ 5 files changed, 19 insertions(+), 4 deletions(-)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -999,6 +999,12 @@ struct btrfs_fs_info {
+       u32 csums_per_leaf;
+       u32 stripesize;
++      /*
++       * Maximum size of an extent. BTRFS_MAX_EXTENT_SIZE on regular
++       * filesystem, on zoned it depends on the device constraints.
++       */
++      u64 max_extent_size;
++
+       /* Block groups and devices containing active swapfiles. */
+       spinlock_t swapfile_pins_lock;
+       struct rb_root swapfile_pins;
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -3006,6 +3006,8 @@ void btrfs_init_fs_info(struct btrfs_fs_
+       fs_info->sectorsize_bits = ilog2(4096);
+       fs_info->stripesize = 4096;
++      fs_info->max_extent_size = BTRFS_MAX_EXTENT_SIZE;
++
+       spin_lock_init(&fs_info->swapfile_pins_lock);
+       fs_info->swapfile_pins = RB_ROOT;
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -1985,8 +1985,10 @@ noinline_for_stack bool find_lock_delall
+                                   struct page *locked_page, u64 *start,
+                                   u64 *end)
+ {
++      struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
+-      u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
++      /* The sanity tests may not set a valid fs_info. */
++      u64 max_bytes = fs_info ? fs_info->max_extent_size : BTRFS_MAX_EXTENT_SIZE;
+       u64 delalloc_start;
+       u64 delalloc_end;
+       bool found;
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -2032,6 +2032,7 @@ int btrfs_run_delalloc_range(struct btrf
+ void btrfs_split_delalloc_extent(struct inode *inode,
+                                struct extent_state *orig, u64 split)
+ {
++      struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       u64 size;
+       /* not delalloc, ignore it */
+@@ -2039,7 +2040,7 @@ void btrfs_split_delalloc_extent(struct
+               return;
+       size = orig->end - orig->start + 1;
+-      if (size > BTRFS_MAX_EXTENT_SIZE) {
++      if (size > fs_info->max_extent_size) {
+               u32 num_extents;
+               u64 new_size;
+@@ -2068,6 +2069,7 @@ void btrfs_split_delalloc_extent(struct
+ void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
+                                struct extent_state *other)
+ {
++      struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       u64 new_size, old_size;
+       u32 num_extents;
+@@ -2081,7 +2083,7 @@ void btrfs_merge_delalloc_extent(struct
+               new_size = other->end - new->start + 1;
+       /* we're not bigger than the max, unreserve the space and go */
+-      if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
++      if (new_size <= fs_info->max_extent_size) {
+               spin_lock(&BTRFS_I(inode)->lock);
+               btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
+               spin_unlock(&BTRFS_I(inode)->lock);
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -670,8 +670,11 @@ int btrfs_check_zoned_mode(struct btrfs_
+       }
+       fs_info->zone_size = zone_size;
+-      fs_info->max_zone_append_size = max_zone_append_size;
++      fs_info->max_zone_append_size = ALIGN_DOWN(max_zone_append_size,
++                                                 fs_info->sectorsize);
+       fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED;
++      if (fs_info->max_zone_append_size < fs_info->max_extent_size)
++              fs_info->max_extent_size = fs_info->max_zone_append_size;
+       /*
+        * Check mount options here, because we might change fs_info->zoned
diff --git a/queue-5.15/btrfs-zoned-revive-max_zone_append_bytes.patch b/queue-5.15/btrfs-zoned-revive-max_zone_append_bytes.patch
new file mode 100644 (file)
index 0000000..ad77cb8
--- /dev/null
@@ -0,0 +1,102 @@
+From foo@baz Thu Aug 25 02:07:49 PM CEST 2022
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Mon, 22 Aug 2022 15:07:02 +0900
+Subject: btrfs: zoned: revive max_zone_append_bytes
+To: linux-btrfs@vger.kernel.org, stable@vger.kernel.org
+Cc: Naohiro Aota <naohiro.aota@wdc.com>, Johannes Thumshirn <johannes.thumshirn@wdc.com>, David Sterba <dsterba@suse.com>
+Message-ID: <20220822060704.1278361-4-naohiro.aota@wdc.com>
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit c2ae7b772ef4e86c5ddf3fd47bf59045ae96a414 upstream
+
+This patch is basically a revert of commit 5a80d1c6a270 ("btrfs: zoned:
+remove max_zone_append_size logic"), but without unnecessary ASSERT and
+check. The max_zone_append_size will be used as a hint to estimate the
+number of extents to cover delalloc/writeback region in the later commits.
+
+The size of a ZONE APPEND bio is also limited by queue_max_segments(), so
+this commit considers it to calculate max_zone_append_size. Technically, a
+bio can be larger than queue_max_segments() * PAGE_SIZE if the pages are
+contiguous. But, it is safe to consider "queue_max_segments() * PAGE_SIZE"
+as an upper limit of an extent size to calculate the number of extents
+needed to write data.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ctree.h |    2 ++
+ fs/btrfs/zoned.c |   17 +++++++++++++++++
+ fs/btrfs/zoned.h |    1 +
+ 3 files changed, 20 insertions(+)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -1017,6 +1017,8 @@ struct btrfs_fs_info {
+               u64 zoned;
+       };
++      /* Max size to emit ZONE_APPEND write command */
++      u64 max_zone_append_size;
+       struct mutex zoned_meta_io_lock;
+       spinlock_t treelog_bg_lock;
+       u64 treelog_bg;
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -386,6 +386,16 @@ int btrfs_get_dev_zone_info(struct btrfs
+       nr_sectors = bdev_nr_sectors(bdev);
+       zone_info->zone_size_shift = ilog2(zone_info->zone_size);
+       zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors);
++      /*
++       * We limit max_zone_append_size also by max_segments *
++       * PAGE_SIZE. Technically, we can have multiple pages per segment. But,
++       * since btrfs adds the pages one by one to a bio, and btrfs cannot
++       * increase the metadata reservation even if it increases the number of
++       * extents, it is safe to stick with the limit.
++       */
++      zone_info->max_zone_append_size =
++              min_t(u64, (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
++                    (u64)bdev_max_segments(bdev) << PAGE_SHIFT);
+       if (!IS_ALIGNED(nr_sectors, zone_sectors))
+               zone_info->nr_zones++;
+@@ -570,6 +580,7 @@ int btrfs_check_zoned_mode(struct btrfs_
+       u64 zoned_devices = 0;
+       u64 nr_devices = 0;
+       u64 zone_size = 0;
++      u64 max_zone_append_size = 0;
+       const bool incompat_zoned = btrfs_fs_incompat(fs_info, ZONED);
+       int ret = 0;
+@@ -605,6 +616,11 @@ int btrfs_check_zoned_mode(struct btrfs_
+                               ret = -EINVAL;
+                               goto out;
+                       }
++                      if (!max_zone_append_size ||
++                          (zone_info->max_zone_append_size &&
++                           zone_info->max_zone_append_size < max_zone_append_size))
++                              max_zone_append_size =
++                                      zone_info->max_zone_append_size;
+               }
+               nr_devices++;
+       }
+@@ -654,6 +670,7 @@ int btrfs_check_zoned_mode(struct btrfs_
+       }
+       fs_info->zone_size = zone_size;
++      fs_info->max_zone_append_size = max_zone_append_size;
+       fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED;
+       /*
+--- a/fs/btrfs/zoned.h
++++ b/fs/btrfs/zoned.h
+@@ -23,6 +23,7 @@ struct btrfs_zoned_device_info {
+        */
+       u64 zone_size;
+       u8  zone_size_shift;
++      u64 max_zone_append_size;
+       u32 nr_zones;
+       unsigned long *seq_zones;
+       unsigned long *empty_zones;
index 12e2927d659b5ed0a5ca56dd659a46c91530c307..469c0f6cc1e15b3b78cfa60974795f3a5a25c1d1 100644 (file)
@@ -6,3 +6,8 @@ parisc-make-config_64bit-available-for-arch-parisc64-only.patch
 parisc-fix-exception-handler-for-fldw-and-fstw-instructions.patch
 kernel-sys_ni-add-compat-entry-for-fadvise64_64.patch
 x86-entry-move-cld-to-the-start-of-the-idtentry-macro.patch
+block-add-a-bdev_max_zone_append_sectors-helper.patch
+block-add-bdev_max_segments-helper.patch
+btrfs-zoned-revive-max_zone_append_bytes.patch
+btrfs-replace-btrfs_max_extent_size-with-fs_info-max_extent_size.patch
+btrfs-convert-count_max_extents-to-use-fs_info-max_extent_size.patch