xfs: set s_min_writeback_pages for zoned file systems

author Christoph Hellwig <hch@lst.de>

Fri, 17 Oct 2025 03:45:49 +0000 (05:45 +0200)

committer Christian Brauner <brauner@kernel.org>

Wed, 29 Oct 2025 14:54:31 +0000 (15:54 +0100)
author Christoph Hellwig <hch@lst.de>
Fri, 17 Oct 2025 03:45:49 +0000 (05:45 +0200)
committer Christian Brauner <brauner@kernel.org>
Wed, 29 Oct 2025 14:54:31 +0000 (15:54 +0100)
diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c

index 1147bacb2da8e6b85910fd35d11c2ca5b06abf3a..c342595acc3e5859dd7a21c2db3119a375aa7629 100644 (file)
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@@ -1215,6 +1215,7 @@ xfs_mount_zones(
                 .mp             = mp,
         };
         struct xfs_buftarg      *bt = mp->m_rtdev_targp;
+       xfs_extlen_t            zone_blocks = mp->m_groups[XG_TYPE_RTG].blocks;
         int                     error;
  
         if (!bt) {
@@ -1245,10 +1246,33 @@ xfs_mount_zones(
                 return -ENOMEM;
  
         xfs_info(mp, "%u zones of %u blocks (%u max open zones)",
-                mp->m_sb.sb_rgcount, mp->m_groups[XG_TYPE_RTG].blocks,
-                mp->m_max_open_zones);
+                mp->m_sb.sb_rgcount, zone_blocks, mp->m_max_open_zones);
         trace_xfs_zones_mount(mp);
  
+       /*
+        * The writeback code switches between inodes regularly to provide
+        * fairness.  The default lower bound is 4MiB, but for zoned file
+        * systems we want to increase that both to reduce seeks, but also more
+        * importantly so that workloads that writes files in a multiple of the
+        * zone size do not get fragmented and require garbage collection when
+        * they shouldn't.  Increase is to the zone size capped by the max
+        * extent len.
+        *
+        * Note that because s_min_writeback_pages is a superblock field, this
+        * value also get applied to non-zoned files on the data device if
+        * there are any.  On typical zoned setup all data is on the RT device
+        * because using the more efficient sequential write required zones
+        * is the reason for using the zone allocator, and either the RT device
+        * and the (meta)data device are on the same block device, or the
+        * (meta)data device is on a fast SSD while the data on the RT device
+        * is on a SMR HDD.  In any combination of the above cases enforcing
+        * the higher min_writeback_pages for non-RT inodes is either a noop
+        * or beneficial.
+        */
+       mp->m_super->s_min_writeback_pages =
+               XFS_FSB_TO_B(mp, min(zone_blocks, XFS_MAX_BMBT_EXTLEN)) >>
+                       PAGE_SHIFT;
+
         if (bdev_is_zoned(bt->bt_bdev)) {
                 error = blkdev_report_zones(bt->bt_bdev,
                                 XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart),
author	Christoph Hellwig <hch@lst.de>
	Fri, 17 Oct 2025 03:45:49 +0000 (05:45 +0200)
committer	Christian Brauner <brauner@kernel.org>
	Wed, 29 Oct 2025 14:54:31 +0000 (15:54 +0100)