]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
md: add a new recovery_flag MD_RECOVERY_LAZY_RECOVER
authorYu Kuai <yukuai3@huawei.com>
Fri, 29 Aug 2025 08:04:24 +0000 (16:04 +0800)
committerYu Kuai <yukuai3@huawei.com>
Sat, 6 Sep 2025 09:20:32 +0000 (17:20 +0800)
This flag is used by llbitmap in later patches to skip raid456 initial
recover and delay building initial xor data to first write.

https://lore.kernel.org/linux-raid/20250829080426.1441678-10-yukuai1@huaweicloud.com
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
drivers/md/md.c
drivers/md/md.h
drivers/md/raid5.c

index ec80c8d6cbcb71e1fbc001847832752c6ba248a7..f5d00253798909029b948e7533c25f8830f0a6b4 100644 (file)
@@ -9199,6 +9199,39 @@ static sector_t md_sync_max_sectors(struct mddev *mddev,
        }
 }
 
+/*
+ * If lazy recovery is requested and all rdevs are in sync, select the rdev with
+ * the higest index to perfore recovery to build initial xor data, this is the
+ * same as old bitmap.
+ */
+static bool mddev_select_lazy_recover_rdev(struct mddev *mddev)
+{
+       struct md_rdev *recover_rdev = NULL;
+       struct md_rdev *rdev;
+       bool ret = false;
+
+       rcu_read_lock();
+       rdev_for_each_rcu(rdev, mddev) {
+               if (rdev->raid_disk < 0)
+                       continue;
+
+               if (test_bit(Faulty, &rdev->flags) ||
+                   !test_bit(In_sync, &rdev->flags))
+                       break;
+
+               if (!recover_rdev || recover_rdev->raid_disk < rdev->raid_disk)
+                       recover_rdev = rdev;
+       }
+
+       if (recover_rdev) {
+               clear_bit(In_sync, &recover_rdev->flags);
+               ret = true;
+       }
+
+       rcu_read_unlock();
+       return ret;
+}
+
 static sector_t md_sync_position(struct mddev *mddev, enum sync_action action)
 {
        sector_t start = 0;
@@ -9230,6 +9263,14 @@ static sector_t md_sync_position(struct mddev *mddev, enum sync_action action)
                                start = rdev->recovery_offset;
                rcu_read_unlock();
 
+               /*
+                * If there are no spares, and raid456 lazy initial recover is
+                * requested.
+                */
+               if (test_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery) &&
+                   start == MaxSector && mddev_select_lazy_recover_rdev(mddev))
+                       start = 0;
+
                /* If there is a bitmap, we need to make sure all
                 * writes that started before we added a spare
                 * complete before we start doing a recovery.
@@ -9791,6 +9832,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
 
                set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
                clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+               clear_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery);
                return true;
        }
 
@@ -9799,6 +9841,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
                remove_spares(mddev, NULL);
                set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
                clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+               clear_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery);
                return true;
        }
 
@@ -9808,7 +9851,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
         * re-add.
         */
        *spares = remove_and_add_spares(mddev, NULL);
-       if (*spares) {
+       if (*spares || test_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery)) {
                clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
                clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
                clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
@@ -10021,6 +10064,7 @@ void md_check_recovery(struct mddev *mddev)
                        }
 
                        clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+                       clear_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery);
                        clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
                        clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
 
@@ -10131,6 +10175,7 @@ void md_reap_sync_thread(struct mddev *mddev)
        clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
        clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
        clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+       clear_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery);
        /*
         * We call mddev->cluster_ops->update_size here because sync_size could
         * be changed by md_update_sb, and MD_RECOVERY_RESHAPE is cleared,
index 4fa5a3e68a0c22f832a33a3d67138a70690cbb28..7b6357879a84d72e0487ab5e2a5087d7fe07e50c 100644 (file)
@@ -667,6 +667,8 @@ enum recovery_flags {
        MD_RECOVERY_RESHAPE,
        /* remote node is running resync thread */
        MD_RESYNCING_REMOTE,
+       /* raid456 lazy initial recover */
+       MD_RECOVERY_LAZY_RECOVER,
 };
 
 enum md_ro_state {
index 672ab226e43c7907c20df9eb4950b18259097d43..5112658ef5f68bc2a18fc7e7994991d4ffef995f 100644 (file)
@@ -4705,10 +4705,21 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
                        }
                } else if (test_bit(In_sync, &rdev->flags))
                        set_bit(R5_Insync, &dev->flags);
-               else if (sh->sector + RAID5_STRIPE_SECTORS(conf) <= rdev->recovery_offset)
-                       /* in sync if before recovery_offset */
-                       set_bit(R5_Insync, &dev->flags);
-               else if (test_bit(R5_UPTODATE, &dev->flags) &&
+               else if (sh->sector + RAID5_STRIPE_SECTORS(conf) <=
+                        rdev->recovery_offset) {
+                       /*
+                        * in sync if:
+                        *  - normal IO, or
+                        *  - resync IO that is not lazy recovery
+                        *
+                        * For lazy recovery, we have to mark the rdev without
+                        * In_sync as failed, to build initial xor data.
+                        */
+                       if (!test_bit(STRIPE_SYNCING, &sh->state) ||
+                           !test_bit(MD_RECOVERY_LAZY_RECOVER,
+                                     &conf->mddev->recovery))
+                               set_bit(R5_Insync, &dev->flags);
+               } else if (test_bit(R5_UPTODATE, &dev->flags) &&
                         test_bit(R5_Expanded, &dev->flags))
                        /* If we've reshaped into here, we assume it is Insync.
                         * We will shortly update recovery_offset to make