]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
md/md-bitmap: add a new method blocks_synced() in bitmap_operations
authorYu Kuai <yukuai3@huawei.com>
Fri, 29 Aug 2025 08:04:23 +0000 (16:04 +0800)
committerYu Kuai <yukuai3@huawei.com>
Sat, 6 Sep 2025 09:20:01 +0000 (17:20 +0800)
Currently, raid456 must perform a whole array initial recovery to build
initail xor data, then IO to the array won't have to read all the blocks
in underlying disks.

This behavior will affect IO performance a lot, and nowadays there are
huge disks and the initial recovery can take a long time. Hence llbitmap
will support lazy initial recovery in following patches. This method is
used to check if data blocks is synced or not, if not then IO will still
have to read all blocks for raid456.

Link: https://lore.kernel.org/linux-raid/20250829080426.1441678-9-yukuai1@huaweicloud.com
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
drivers/md/md-bitmap.h
drivers/md/raid5.c

index 95453696c68e4cba42c285f860b1ad63f9000712..5f41724cbcd8723ebf932de71bd1d8587542fc56 100644 (file)
@@ -90,6 +90,7 @@ struct bitmap_operations {
        md_bitmap_fn *end_discard;
 
        sector_t (*skip_sync_blocks)(struct mddev *mddev, sector_t offset);
+       bool (*blocks_synced)(struct mddev *mddev, sector_t offset);
        bool (*start_sync)(struct mddev *mddev, sector_t offset,
                           sector_t *blocks, bool degraded);
        void (*end_sync)(struct mddev *mddev, sector_t offset, sector_t *blocks);
index 5285e72341a2b1f8d48a145af51293aaffe84bc0..672ab226e43c7907c20df9eb4950b18259097d43 100644 (file)
@@ -4097,7 +4097,8 @@ static int handle_stripe_dirtying(struct r5conf *conf,
                                  int disks)
 {
        int rmw = 0, rcw = 0, i;
-       sector_t resync_offset = conf->mddev->resync_offset;
+       struct mddev *mddev = conf->mddev;
+       sector_t resync_offset = mddev->resync_offset;
 
        /* Check whether resync is now happening or should start.
         * If yes, then the array is dirty (after unclean shutdown or
@@ -4116,6 +4117,12 @@ static int handle_stripe_dirtying(struct r5conf *conf,
                pr_debug("force RCW rmw_level=%u, resync_offset=%llu sh->sector=%llu\n",
                         conf->rmw_level, (unsigned long long)resync_offset,
                         (unsigned long long)sh->sector);
+       } else if (mddev->bitmap_ops && mddev->bitmap_ops->blocks_synced &&
+                  !mddev->bitmap_ops->blocks_synced(mddev, sh->sector)) {
+               /* The initial recover is not done, must read everything */
+               rcw = 1; rmw = 2;
+               pr_debug("force RCW by lazy recovery, sh->sector=%llu\n",
+                        sh->sector);
        } else for (i = disks; i--; ) {
                /* would I have to read this buffer for read_modify_write */
                struct r5dev *dev = &sh->dev[i];
@@ -4148,7 +4155,7 @@ static int handle_stripe_dirtying(struct r5conf *conf,
        set_bit(STRIPE_HANDLE, &sh->state);
        if ((rmw < rcw || (rmw == rcw && conf->rmw_level == PARITY_PREFER_RMW)) && rmw > 0) {
                /* prefer read-modify-write, but need to get some data */
-               mddev_add_trace_msg(conf->mddev, "raid5 rmw %llu %d",
+               mddev_add_trace_msg(mddev, "raid5 rmw %llu %d",
                                sh->sector, rmw);
 
                for (i = disks; i--; ) {
@@ -4227,8 +4234,8 @@ static int handle_stripe_dirtying(struct r5conf *conf,
                                        set_bit(STRIPE_DELAYED, &sh->state);
                        }
                }
-               if (rcw && !mddev_is_dm(conf->mddev))
-                       blk_add_trace_msg(conf->mddev->gendisk->queue,
+               if (rcw && !mddev_is_dm(mddev))
+                       blk_add_trace_msg(mddev->gendisk->queue,
                                "raid5 rcw %llu %d %d %d",
                                (unsigned long long)sh->sector, rcw, qread,
                                test_bit(STRIPE_DELAYED, &sh->state));