Currently when sync read fails and badblocks set fails (exceeding
512 limit), rdev isn't immediately marked Faulty. Instead
'recovery_disabled' is set and non-In_sync rdevs are removed later.
This preserves array availability if bad regions aren't read, but bad
sectors might be read by users before rdev removal. This occurs due
to incorrect resync/recovery_offset updates that include these bad
sectors.
When badblocks exceed 512, keeping the disk provides little benefit
while adding complexity. Prompt disk replacement is more important.
Therefore when badblocks set fails, directly call md_error to mark rdev
Faulty immediately, preventing potential data access issues.
After this change, cleanup of offset update logic and 'recovery_disabled'
handling will follow.
Link: https://lore.kernel.org/linux-raid/20260105110300.1442509-6-linan666@huaweicloud.com
Fixes: 5e5702898e93 ("md/raid10: Handle read errors during recovery better.")
Fixes: 3a9f28a5117e ("md/raid1: improve handling of read failure during recovery.")
Signed-off-by: Li Nan <linan122@huawei.com>
Signed-off-by: Yu Kuai <yukuai@fnnas.com>
else
s += rdev->data_offset;
- if (!badblocks_set(&rdev->badblocks, s, sectors, 0))
+ if (!badblocks_set(&rdev->badblocks, s, sectors, 0)) {
+ /*
+ * Mark the disk as Faulty when setting badblocks fails,
+ * otherwise, bad sectors may be read.
+ */
+ md_error(mddev, rdev);
return false;
+ }
/* Make sure they get written out promptly */
if (test_bit(ExternalBbl, &rdev->flags))
rdev->mddev->recovery);
}
/* need to record an error - either for the block or the device */
- if (!rdev_set_badblocks(rdev, sector, sectors, 0))
- md_error(rdev->mddev, rdev);
+ rdev_set_badblocks(rdev, sector, sectors, 0);
return 0;
}
if (!success) {
/* Cannot read from anywhere - mark it bad */
struct md_rdev *rdev = conf->mirrors[read_disk].rdev;
- if (!rdev_set_badblocks(rdev, sect, s, 0))
- md_error(mddev, rdev);
+ rdev_set_badblocks(rdev, sect, s, 0);
break;
}
/* write it back and re-read */
* Badblocks set failed, disk marked Faulty.
* No further operations needed.
*/
- md_error(mddev, rdev);
bio_put(wbio);
break;
}
if (bio->bi_end_io == NULL)
continue;
if (!bio->bi_status &&
- test_bit(R1BIO_MadeGood, &r1_bio->state)) {
+ test_bit(R1BIO_MadeGood, &r1_bio->state))
rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
- }
if (bio->bi_status &&
- test_bit(R1BIO_WriteError, &r1_bio->state)) {
- if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
- md_error(conf->mddev, rdev);
- }
+ test_bit(R1BIO_WriteError, &r1_bio->state))
+ rdev_set_badblocks(rdev, r1_bio->sector, s, 0);
}
put_buf(r1_bio);
md_done_sync(conf->mddev, s);
&rdev->mddev->recovery);
}
/* need to record an error - either for the block or the device */
- if (!rdev_set_badblocks(rdev, sector, sectors, 0))
- md_error(rdev->mddev, rdev);
+ rdev_set_badblocks(rdev, sector, sectors, 0);
return 0;
}
r10_bio->devs[slot].addr
+ sect,
s, 0)) {
- md_error(mddev, rdev);
r10_bio->devs[slot].bio
= IO_BLOCKED;
}
* Badblocks set failed, disk marked Faulty.
* No further operations needed.
*/
- md_error(mddev, rdev);
bio_put(wbio);
break;
}
if (r10_bio->devs[m].bio == NULL ||
r10_bio->devs[m].bio->bi_end_io == NULL)
continue;
- if (!r10_bio->devs[m].bio->bi_status) {
+ if (!r10_bio->devs[m].bio->bi_status)
rdev_clear_badblocks(
rdev,
r10_bio->devs[m].addr,
r10_bio->sectors, 0);
- } else {
- if (!rdev_set_badblocks(
- rdev,
- r10_bio->devs[m].addr,
- r10_bio->sectors, 0))
- md_error(conf->mddev, rdev);
- }
+ else
+ rdev_set_badblocks(rdev,
+ r10_bio->devs[m].addr,
+ r10_bio->sectors, 0);
rdev = conf->mirrors[dev].replacement;
if (r10_bio->devs[m].repl_bio == NULL ||
r10_bio->devs[m].repl_bio->bi_end_io == NULL)
continue;
- if (!r10_bio->devs[m].repl_bio->bi_status) {
+ if (!r10_bio->devs[m].repl_bio->bi_status)
rdev_clear_badblocks(
rdev,
r10_bio->devs[m].addr,
r10_bio->sectors, 0);
- } else {
- if (!rdev_set_badblocks(
- rdev,
- r10_bio->devs[m].addr,
- r10_bio->sectors, 0))
- md_error(conf->mddev, rdev);
- }
+ else
+ rdev_set_badblocks(rdev,
+ r10_bio->devs[m].addr,
+ r10_bio->sectors, 0);
}
put_buf(r10_bio);
} else {
else {
clear_bit(R5_ReadError, &sh->dev[i].flags);
clear_bit(R5_ReWrite, &sh->dev[i].flags);
- if (!(set_bad
- && test_bit(In_sync, &rdev->flags)
- && rdev_set_badblocks(
- rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), 0)))
- md_error(conf->mddev, rdev);
+ if (!(set_bad && test_bit(In_sync, &rdev->flags)))
+ rdev_set_badblocks(rdev, sh->sector,
+ RAID5_STRIPE_SECTORS(conf), 0);
}
}
rdev_dec_pending(rdev, conf->mddev);
else
rdev = NULL;
if (rdev) {
- if (!rdev_set_badblocks(
- rdev,
- sh->sector,
- RAID5_STRIPE_SECTORS(conf), 0))
- md_error(conf->mddev, rdev);
+ rdev_set_badblocks(rdev,
+ sh->sector,
+ RAID5_STRIPE_SECTORS(conf),
+ 0);
rdev_dec_pending(rdev, conf->mddev);
}
}
if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
/* We own a safe reference to the rdev */
rdev = conf->disks[i].rdev;
- if (!rdev_set_badblocks(rdev, sh->sector,
- RAID5_STRIPE_SECTORS(conf), 0))
- md_error(conf->mddev, rdev);
+ rdev_set_badblocks(rdev, sh->sector,
+ RAID5_STRIPE_SECTORS(conf), 0);
rdev_dec_pending(rdev, conf->mddev);
}
if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {