]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
md/raid1,raid10: don't handle IO error for REQ_RAHEAD and REQ_NOWAIT
authorYu Kuai <yukuai3@huawei.com>
Tue, 27 May 2025 08:14:07 +0000 (16:14 +0800)
committerYu Kuai <yukuai3@huawei.com>
Fri, 30 May 2025 07:46:45 +0000 (15:46 +0800)
IO with REQ_RAHEAD or REQ_NOWAIT can fail early, even if the storage medium
is fine, hence record badblocks or remove the disk from array does not
make sense.

This problem if found by lvm2 test lvcreate-large-raid, where dm-zero
will fail read ahead IO directly.

Fixes: e879a0d9cb08 ("md/raid1,raid10: don't ignore IO flags")
Reported-and-tested-by: Mikulas Patocka <mpatocka@redhat.com>
Closes: https://lore.kernel.org/all/34fa755d-62c8-4588-8ee1-33cb1249bdf2@redhat.com/
Link: https://lore.kernel.org/linux-raid/20250527081407.3004055-1-yukuai1@huaweicloud.com
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
drivers/md/raid1-10.c
drivers/md/raid1.c
drivers/md/raid10.c

index c7efd8aab675cc6b1ba97ae4af43468c88829cca..b8b3a90697012c610a2dc0e4c7210dcd3a1021b6 100644 (file)
@@ -293,3 +293,13 @@ static inline bool raid1_should_read_first(struct mddev *mddev,
 
        return false;
 }
+
+/*
+ * bio with REQ_RAHEAD or REQ_NOWAIT can fail at anytime, before such IO is
+ * submitted to the underlying disks, hence don't record badblocks or retry
+ * in this case.
+ */
+static inline bool raid1_should_handle_error(struct bio *bio)
+{
+       return !(bio->bi_opf & (REQ_RAHEAD | REQ_NOWAIT));
+}
index 657d481525be68fc82bfdb632e660b0b97ce36fb..19c5a0ce5a408f82855718acc47999e2d23f6cfd 100644 (file)
@@ -373,14 +373,16 @@ static void raid1_end_read_request(struct bio *bio)
         */
        update_head_pos(r1_bio->read_disk, r1_bio);
 
-       if (uptodate)
+       if (uptodate) {
                set_bit(R1BIO_Uptodate, &r1_bio->state);
-       else if (test_bit(FailFast, &rdev->flags) &&
-                test_bit(R1BIO_FailFast, &r1_bio->state))
+       else if (test_bit(FailFast, &rdev->flags) &&
+                test_bit(R1BIO_FailFast, &r1_bio->state)) {
                /* This was a fail-fast read so we definitely
                 * want to retry */
                ;
-       else {
+       } else if (!raid1_should_handle_error(bio)) {
+               uptodate = 1;
+       } else {
                /* If all other devices have failed, we want to return
                 * the error upwards rather than fail the last device.
                 * Here we redefine "uptodate" to mean "Don't want to retry"
@@ -451,16 +453,15 @@ static void raid1_end_write_request(struct bio *bio)
        struct bio *to_put = NULL;
        int mirror = find_bio_disk(r1_bio, bio);
        struct md_rdev *rdev = conf->mirrors[mirror].rdev;
-       bool discard_error;
        sector_t lo = r1_bio->sector;
        sector_t hi = r1_bio->sector + r1_bio->sectors;
-
-       discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
+       bool ignore_error = !raid1_should_handle_error(bio) ||
+               (bio->bi_status && bio_op(bio) == REQ_OP_DISCARD);
 
        /*
         * 'one mirror IO has finished' event handler:
         */
-       if (bio->bi_status && !discard_error) {
+       if (bio->bi_status && !ignore_error) {
                set_bit(WriteErrorSeen, &rdev->flags);
                if (!test_and_set_bit(WantReplacement, &rdev->flags))
                        set_bit(MD_RECOVERY_NEEDED, &
@@ -511,7 +512,7 @@ static void raid1_end_write_request(struct bio *bio)
 
                /* Maybe we can clear some bad blocks. */
                if (rdev_has_badblock(rdev, r1_bio->sector, r1_bio->sectors) &&
-                   !discard_error) {
+                   !ignore_error) {
                        r1_bio->bios[mirror] = IO_MADE_GOOD;
                        set_bit(R1BIO_MadeGood, &r1_bio->state);
                }
index dce06bf65016fb2164be3bde65f5d0b3da28e217..b74780af4c220db77d1c3d502366faed9fb71590 100644 (file)
@@ -399,6 +399,8 @@ static void raid10_end_read_request(struct bio *bio)
                 * wait for the 'master' bio.
                 */
                set_bit(R10BIO_Uptodate, &r10_bio->state);
+       } else if (!raid1_should_handle_error(bio)) {
+               uptodate = 1;
        } else {
                /* If all other devices that store this block have
                 * failed, we want to return the error upwards rather
@@ -456,9 +458,8 @@ static void raid10_end_write_request(struct bio *bio)
        int slot, repl;
        struct md_rdev *rdev = NULL;
        struct bio *to_put = NULL;
-       bool discard_error;
-
-       discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
+       bool ignore_error = !raid1_should_handle_error(bio) ||
+               (bio->bi_status && bio_op(bio) == REQ_OP_DISCARD);
 
        dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
 
@@ -472,7 +473,7 @@ static void raid10_end_write_request(struct bio *bio)
        /*
         * this branch is our 'one mirror IO has finished' event handler:
         */
-       if (bio->bi_status && !discard_error) {
+       if (bio->bi_status && !ignore_error) {
                if (repl)
                        /* Never record new bad blocks to replacement,
                         * just fail it.
@@ -527,7 +528,7 @@ static void raid10_end_write_request(struct bio *bio)
                /* Maybe we can clear some bad blocks. */
                if (rdev_has_badblock(rdev, r10_bio->devs[slot].addr,
                                      r10_bio->sectors) &&
-                   !discard_error) {
+                   !ignore_error) {
                        bio_put(bio);
                        if (repl)
                                r10_bio->devs[slot].repl_bio = IO_MADE_GOOD;