]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
block: introduce zone_write_granularity limit
authorDamien Le Moal <damien.lemoal@wdc.com>
Thu, 28 Jan 2021 04:47:30 +0000 (13:47 +0900)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 13 Apr 2024 10:58:07 +0000 (12:58 +0200)
[ Upstream commit a805a4fa4fa376bbc145762bb8b09caa2fa8af48 ]

Per ZBC and ZAC specifications, host-managed SMR hard-disks mandate that
all writes into sequential write required zones be aligned to the device
physical block size. However, NVMe ZNS does not have this constraint and
allows write operations into sequential zones to be aligned to the
device logical block size. This inconsistency does not help with
software portability across device types.

To solve this, introduce the zone_write_granularity queue limit to
indicate the alignment constraint, in bytes, of write operations into
zones of a zoned block device. This new limit is exported as a
read-only sysfs queue attribute and the helper
blk_queue_zone_write_granularity() introduced for drivers to set this
limit.

The function blk_queue_set_zoned() is modified to set this new limit to
the device logical block size by default. NVMe ZNS devices as well as
zoned nullb devices use this default value as is. The scsi disk driver
is modified to execute the blk_queue_zone_write_granularity() helper to
set the zone write granularity of host-managed SMR disks to the disk
physical block size.

The accessor functions queue_zone_write_granularity() and
bdev_zone_write_granularity() are also introduced.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@edc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Stable-dep-of: c8f6f88d2592 ("block: Clear zone limits for a non-zoned stacked queue")
Signed-off-by: Sasha Levin <sashal@kernel.org>
Documentation/block/queue-sysfs.rst
block/blk-settings.c
block/blk-sysfs.c
drivers/scsi/sd_zbc.c
include/linux/blkdev.h

index 2638d3446b79457d9772145e0d70a358349f9087..c8bf8bc3c03afb78dcb4f28b5eaf72f960c7902a 100644 (file)
@@ -273,4 +273,11 @@ devices are described in the ZBC (Zoned Block Commands) and ZAC
 do not support zone commands, they will be treated as regular block devices
 and zoned will report "none".
 
+zone_write_granularity (RO)
+---------------------------
+This indicates the alignment constraint, in bytes, for write operations in
+sequential zones of zoned block devices (devices with a zoned attributed
+that reports "host-managed" or "host-aware"). This value is always 0 for
+regular block devices.
+
 Jens Axboe <jens.axboe@oracle.com>, February 2009
index c3aa7f8ee388357c7b96b50cd58c6591910bba84..ab39169aa2b289ccf2d0dc78865731d342fc0ce4 100644 (file)
@@ -60,6 +60,7 @@ void blk_set_default_limits(struct queue_limits *lim)
        lim->io_opt = 0;
        lim->misaligned = 0;
        lim->zoned = BLK_ZONED_NONE;
+       lim->zone_write_granularity = 0;
 }
 EXPORT_SYMBOL(blk_set_default_limits);
 
@@ -353,6 +354,28 @@ void blk_queue_physical_block_size(struct request_queue *q, unsigned int size)
 }
 EXPORT_SYMBOL(blk_queue_physical_block_size);
 
+/**
+ * blk_queue_zone_write_granularity - set zone write granularity for the queue
+ * @q:  the request queue for the zoned device
+ * @size:  the zone write granularity size, in bytes
+ *
+ * Description:
+ *   This should be set to the lowest possible size allowing to write in
+ *   sequential zones of a zoned block device.
+ */
+void blk_queue_zone_write_granularity(struct request_queue *q,
+                                     unsigned int size)
+{
+       if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
+               return;
+
+       q->limits.zone_write_granularity = size;
+
+       if (q->limits.zone_write_granularity < q->limits.logical_block_size)
+               q->limits.zone_write_granularity = q->limits.logical_block_size;
+}
+EXPORT_SYMBOL_GPL(blk_queue_zone_write_granularity);
+
 /**
  * blk_queue_alignment_offset - set physical block alignment offset
  * @q: the request queue for the device
@@ -630,6 +653,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
                        t->discard_granularity;
        }
 
+       t->zone_write_granularity = max(t->zone_write_granularity,
+                                       b->zone_write_granularity);
        t->zoned = max(t->zoned, b->zoned);
        return ret;
 }
@@ -846,6 +871,8 @@ EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging);
  */
 void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
 {
+       struct request_queue *q = disk->queue;
+
        switch (model) {
        case BLK_ZONED_HM:
                /*
@@ -874,7 +901,15 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
                break;
        }
 
-       disk->queue->limits.zoned = model;
+       q->limits.zoned = model;
+       if (model != BLK_ZONED_NONE) {
+               /*
+                * Set the zone write granularity to the device logical block
+                * size by default. The driver can change this value if needed.
+                */
+               blk_queue_zone_write_granularity(q,
+                                               queue_logical_block_size(q));
+       }
 }
 EXPORT_SYMBOL_GPL(blk_queue_set_zoned);
 
index 9174137a913c4f1a86a2c49fe5009a35d5ca7379..ddf23bf3e0f1dbf502cc2ef4eeaa22016f031179 100644 (file)
@@ -219,6 +219,12 @@ static ssize_t queue_write_zeroes_max_show(struct request_queue *q, char *page)
                (unsigned long long)q->limits.max_write_zeroes_sectors << 9);
 }
 
+static ssize_t queue_zone_write_granularity_show(struct request_queue *q,
+                                                char *page)
+{
+       return queue_var_show(queue_zone_write_granularity(q), page);
+}
+
 static ssize_t queue_zone_append_max_show(struct request_queue *q, char *page)
 {
        unsigned long long max_sectors = q->limits.max_zone_append_sectors;
@@ -585,6 +591,7 @@ QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data");
 QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes");
 QUEUE_RO_ENTRY(queue_write_zeroes_max, "write_zeroes_max_bytes");
 QUEUE_RO_ENTRY(queue_zone_append_max, "zone_append_max_bytes");
+QUEUE_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity");
 
 QUEUE_RO_ENTRY(queue_zoned, "zoned");
 QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
@@ -639,6 +646,7 @@ static struct attribute *queue_attrs[] = {
        &queue_write_same_max_entry.attr,
        &queue_write_zeroes_max_entry.attr,
        &queue_zone_append_max_entry.attr,
+       &queue_zone_write_granularity_entry.attr,
        &queue_nonrot_entry.attr,
        &queue_zoned_entry.attr,
        &queue_nr_zones_entry.attr,
index 01088f333dbc4eb03618b6f0dc9d0e5adfef056f..f9cd41703d99b99f2a6fed6a4d79bde867a1c08a 100644 (file)
@@ -793,6 +793,14 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
        blk_queue_max_active_zones(q, 0);
        nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
 
+       /*
+        * Per ZBC and ZAC specifications, writes in sequential write required
+        * zones of host-managed devices must be aligned to the device physical
+        * block size.
+        */
+       if (blk_queue_zoned_model(q) == BLK_ZONED_HM)
+               blk_queue_zone_write_granularity(q, sdkp->physical_block_size);
+
        /* READ16/WRITE16 is mandatory for ZBC disks */
        sdkp->device->use_16_for_rw = 1;
        sdkp->device->use_10_for_rw = 0;
index 583824f1110790057ad313de57500615ce2ed664..a47e1aebaff24287f82e75c1799512c7c29a56e0 100644 (file)
@@ -345,6 +345,7 @@ struct queue_limits {
        unsigned int            max_zone_append_sectors;
        unsigned int            discard_granularity;
        unsigned int            discard_alignment;
+       unsigned int            zone_write_granularity;
 
        unsigned short          max_segments;
        unsigned short          max_integrity_segments;
@@ -1169,6 +1170,8 @@ extern void blk_queue_logical_block_size(struct request_queue *, unsigned int);
 extern void blk_queue_max_zone_append_sectors(struct request_queue *q,
                unsigned int max_zone_append_sectors);
 extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
+void blk_queue_zone_write_granularity(struct request_queue *q,
+                                     unsigned int size);
 extern void blk_queue_alignment_offset(struct request_queue *q,
                                       unsigned int alignment);
 void blk_queue_update_readahead(struct request_queue *q);
@@ -1480,6 +1483,18 @@ static inline int bdev_io_opt(struct block_device *bdev)
        return queue_io_opt(bdev_get_queue(bdev));
 }
 
+static inline unsigned int
+queue_zone_write_granularity(const struct request_queue *q)
+{
+       return q->limits.zone_write_granularity;
+}
+
+static inline unsigned int
+bdev_zone_write_granularity(struct block_device *bdev)
+{
+       return queue_zone_write_granularity(bdev_get_queue(bdev));
+}
+
 static inline int queue_alignment_offset(const struct request_queue *q)
 {
        if (q->limits.misaligned)