]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
iomap: add IOMAP_DIO_FSBLOCK_ALIGNED flag
authorQu Wenruo <wqu@suse.com>
Fri, 31 Oct 2025 13:10:26 +0000 (14:10 +0100)
committerChristian Brauner <brauner@kernel.org>
Wed, 5 Nov 2025 12:09:27 +0000 (13:09 +0100)
Btrfs requires all of its bios to be fs block aligned, normally it's
totally fine but with the incoming block size larger than page size
(bs > ps) support, the requirement is no longer met for direct IOs.

Because iomap_dio_bio_iter() calls bio_iov_iter_get_pages(), only
requiring alignment to be bdev_logical_block_size().

In the real world that value is either 512 or 4K, on 4K page sized
systems it means bio_iov_iter_get_pages() can break the bio at any page
boundary, breaking btrfs' requirement for bs > ps cases.

To address this problem, introduce a new public iomap dio flag,
IOMAP_DIO_FSBLOCK_ALIGNED.

When calling __iomap_dio_rw() with that new flag, iomap_dio::flags will
inherit that new flag, and iomap_dio_bio_iter() will take fs block size
into the calculation of the alignment, and pass the alignment to
bio_iov_iter_get_pages(), respecting the fs block size requirement.

The initial user of this flag will be btrfs, which needs to calculate the
checksum for direct read and thus requires the biovec to be fs block
aligned for the incoming bs > ps support.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Pankaj Raghav <p.raghav@samsung.com>
[hch: also align pos/len, incorporate the trace flags from Darrick]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://patch.msgid.link/20251031131045.1613229-2-hch@lst.de
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
fs/iomap/direct-io.c
fs/iomap/trace.h
include/linux/iomap.h

index e9e5f07031609e4fe9f8211d22fd38a12347655d..8b2f9fb89eb3c95605f7ae85c3f1c43af49b1be8 100644 (file)
@@ -336,8 +336,18 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
        int nr_pages, ret = 0;
        u64 copied = 0;
        size_t orig_count;
+       unsigned int alignment;
 
-       if ((pos | length) & (bdev_logical_block_size(iomap->bdev) - 1))
+       /*
+        * File systems that write out of place and always allocate new blocks
+        * need each bio to be block aligned as that's the unit of allocation.
+        */
+       if (dio->flags & IOMAP_DIO_FSBLOCK_ALIGNED)
+               alignment = fs_block_size;
+       else
+               alignment = bdev_logical_block_size(iomap->bdev);
+
+       if ((pos | length) & (alignment - 1))
                return -EINVAL;
 
        if (dio->flags & IOMAP_DIO_WRITE) {
@@ -434,7 +444,7 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
                bio->bi_end_io = iomap_dio_bio_end_io;
 
                ret = bio_iov_iter_get_pages(bio, dio->submit.iter,
-                               bdev_logical_block_size(iomap->bdev) - 1);
+                                            alignment - 1);
                if (unlikely(ret)) {
                        /*
                         * We have to stop part way through an IO. We must fall
@@ -639,6 +649,9 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        if (iocb->ki_flags & IOCB_NOWAIT)
                iomi.flags |= IOMAP_NOWAIT;
 
+       if (dio_flags & IOMAP_DIO_FSBLOCK_ALIGNED)
+               dio->flags |= IOMAP_DIO_FSBLOCK_ALIGNED;
+
        if (iov_iter_rw(iter) == READ) {
                /* reads can always complete inline */
                dio->flags |= IOMAP_DIO_INLINE_COMP;
index a61c1dae47427006f751849247dbb095fb48bf78..532787277b168e881759d521cd3559ec65979152 100644 (file)
@@ -122,9 +122,10 @@ DEFINE_RANGE_EVENT(iomap_zero_iter);
 
 
 #define IOMAP_DIO_STRINGS \
-       {IOMAP_DIO_FORCE_WAIT,  "DIO_FORCE_WAIT" }, \
-       {IOMAP_DIO_OVERWRITE_ONLY, "DIO_OVERWRITE_ONLY" }, \
-       {IOMAP_DIO_PARTIAL,     "DIO_PARTIAL" }
+       {IOMAP_DIO_FORCE_WAIT,          "DIO_FORCE_WAIT" }, \
+       {IOMAP_DIO_OVERWRITE_ONLY,      "DIO_OVERWRITE_ONLY" }, \
+       {IOMAP_DIO_PARTIAL,             "DIO_PARTIAL" }, \
+       {IOMAP_DIO_FSBLOCK_ALIGNED,     "DIO_FSBLOCK_ALIGNED" }
 
 DECLARE_EVENT_CLASS(iomap_class,
        TP_PROTO(struct inode *inode, struct iomap *iomap),
index 65d1231148830a972c3a3fdef32b58fcaa2236aa..8b1ac08c74741bdf094ae0375f860c18f3705c5f 100644 (file)
@@ -553,6 +553,14 @@ struct iomap_dio_ops {
  */
 #define IOMAP_DIO_PARTIAL              (1 << 2)
 
+/*
+ * Ensure each bio is aligned to fs block size.
+ *
+ * For filesystems which need to calculate/verify the checksum of each fs
+ * block. Otherwise they may not be able to handle unaligned bios.
+ */
+#define IOMAP_DIO_FSBLOCK_ALIGNED      (1 << 3)
+
 ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
                unsigned int dio_flags, void *private, size_t done_before);