]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
md: allow removing faulty rdev during resync
authorZheng Qixing <zhengqixing@huawei.com>
Mon, 7 Jul 2025 07:54:12 +0000 (15:54 +0800)
committerYu Kuai <yukuai3@huawei.com>
Sat, 12 Jul 2025 09:55:20 +0000 (17:55 +0800)
During RAID resync, faulty rdev cannot be removed and will result in
"Device or resource busy" error when attempting hot removal.

Reproduction steps:
  mdadm -Cv /dev/md0 -l1 -n3 -e1.2 /dev/sd{b..d}
  mdadm /dev/md0 -f /dev/sdb
  mdadm /dev/md0 -r /dev/sdb
  -> mdadm: hot remove failed for /dev/sdb: Device or resource busy

After commit 4b10a3bc67c1 ("md: ensure resync is prioritized over
recovery"), when a device becomes faulty during resync, the
md_choose_sync_action() function returns early without calling
remove_and_add_spares(), preventing faulty device removal.

This patch extracts a helper function remove_spares() to support
removing faulty devices during RAID resync operations.

Fixes: 4b10a3bc67c1 ("md: ensure resync is prioritized over recovery")
Signed-off-by: Zheng Qixing <zhengqixing@huawei.com>
Reviewed-by: Li Nan <linan122@huawei.com>
Link: https://lore.kernel.org/linux-raid/20250707075412.150301-1-zhengqixing@huaweicloud.com
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
drivers/md/md.c

index 7ae91155f2e4f9b59e6eeb53326feb9454c72805..046fe85c76fe18de7370f8660b56048b5a880394 100644 (file)
@@ -9459,17 +9459,11 @@ static bool md_spares_need_change(struct mddev *mddev)
        return false;
 }
 
-static int remove_and_add_spares(struct mddev *mddev,
-                                struct md_rdev *this)
+static int remove_spares(struct mddev *mddev, struct md_rdev *this)
 {
        struct md_rdev *rdev;
-       int spares = 0;
        int removed = 0;
 
-       if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
-               /* Mustn't remove devices when resync thread is running */
-               return 0;
-
        rdev_for_each(rdev, mddev) {
                if ((this == NULL || rdev == this) && rdev_removeable(rdev) &&
                    !mddev->pers->hot_remove_disk(mddev, rdev)) {
@@ -9483,6 +9477,21 @@ static int remove_and_add_spares(struct mddev *mddev,
        if (removed && mddev->kobj.sd)
                sysfs_notify_dirent_safe(mddev->sysfs_degraded);
 
+       return removed;
+}
+
+static int remove_and_add_spares(struct mddev *mddev,
+                                struct md_rdev *this)
+{
+       struct md_rdev *rdev;
+       int spares = 0;
+       int removed = 0;
+
+       if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+               /* Mustn't remove devices when resync thread is running */
+               return 0;
+
+       removed = remove_spares(mddev, this);
        if (this && removed)
                goto no_add;
 
@@ -9525,6 +9534,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
 
        /* Check if resync is in progress. */
        if (mddev->recovery_cp < MaxSector) {
+               remove_spares(mddev, NULL);
                set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
                clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
                return true;