From: Christoph Hellwig Date: Wed, 5 Nov 2025 19:52:15 +0000 (-0500) Subject: block: fix cached zone reporting after zone append was used X-Git-Tag: v6.19-rc1~168^2~79 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=15638d52cbcf6e969f4a5e2757b118355db583f3;p=thirdparty%2Fkernel%2Flinux.git block: fix cached zone reporting after zone append was used No zone plugs are allocated when a zone is opened by calling Zone Append on it. This makes the cached zone reporting report incorrectly empty zones if the file system is unmounted and report zones is called after that, e.g. by xfstests test cases using the scratch device. Fix this by recording if zone append was used on a device, and disable cached reporting for the device until a ZONE_RESET_ALL happens that guarantees all zones are empty. We could probably do even better using a per-zone flag, but the practical use cache for zone reporting after the initial mount are rather limited, so let's keep things simple for now. Fixes: 31f0656a4ab7 ("block: introduce blkdev_report_zones_cached()") Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- diff --git a/block/blk-zoned.c b/block/blk-zoned.c index a0ce17e2143f4..c5226bcaaa940 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -899,6 +899,19 @@ static int blkdev_report_zone_fallback(struct block_device *bdev, return blkdev_do_report_zones(bdev, sector, 1, &args); } +/* + * For devices that natively support zone append operations, we do not use zone + * write plugging for zone append writes, which makes the zone condition + * tracking invalid once zone append was used. In that case fall back to a + * regular report zones to get correct information. + */ +static inline bool blkdev_has_cached_report_zones(struct block_device *bdev) +{ + return disk_need_zone_resources(bdev->bd_disk) && + (bdev_emulates_zone_append(bdev) || + !test_bit(GD_ZONE_APPEND_USED, &bdev->bd_disk->state)); +} + /** * blkdev_get_zone_info - Get a single zone information from cached data * @bdev: Target block device @@ -932,6 +945,9 @@ int blkdev_get_zone_info(struct block_device *bdev, sector_t sector, memset(zone, 0, sizeof(*zone)); sector = ALIGN_DOWN(sector, zone_sectors); + if (!blkdev_has_cached_report_zones(bdev)) + return blkdev_report_zone_fallback(bdev, sector, zone); + rcu_read_lock(); zones_cond = rcu_dereference(disk->zones_cond); if (!disk->zone_wplugs_hash || !zones_cond) { @@ -1035,11 +1051,7 @@ int blkdev_report_zones_cached(struct block_device *bdev, sector_t sector, if (!nr_zones || sector >= capacity) return 0; - /* - * If we do not have any zone write plug resources, fallback to using - * the regular zone report. - */ - if (!disk_need_zone_resources(disk)) { + if (!blkdev_has_cached_report_zones(bdev)) { struct blk_report_zones_args args = { .cb = cb, .data = data, @@ -1115,6 +1127,7 @@ static void blk_zone_reset_all_bio_endio(struct bio *bio) for (sector = 0; sector < capacity; sector += bdev_zone_sectors(bio->bi_bdev)) disk_zone_set_cond(disk, sector, BLK_ZONE_COND_EMPTY); + clear_bit(GD_ZONE_APPEND_USED, &disk->state); } static void blk_zone_finish_bio_endio(struct bio *bio) @@ -1474,6 +1487,9 @@ static void blk_zone_wplug_handle_native_zone_append(struct bio *bio) struct blk_zone_wplug *zwplug; unsigned long flags; + if (!test_bit(GD_ZONE_APPEND_USED, &disk->state)) + set_bit(GD_ZONE_APPEND_USED, &disk->state); + /* * We have native support for zone append operations, so we are not * going to handle @bio through plugging. However, we may already have a diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f0ab02e0a673c..6a498aa7f7e76 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -173,6 +173,7 @@ struct gendisk { #define GD_ADDED 4 #define GD_SUPPRESS_PART_SCAN 5 #define GD_OWNS_QUEUE 6 +#define GD_ZONE_APPEND_USED 7 struct mutex open_mutex; /* open/close mutex */ unsigned open_partitions; /* number of open partitions */