git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob

   1 From 3056e3aec8d8ba61a0710fb78b2d562600aa2ea7 Mon Sep 17 00:00:00 2001
   2 From: Alex Lyakas <alex@zadarastorage.com>
   3 Date: Tue, 4 Jun 2013 20:42:21 +0300
   4 Subject: md/raid1: consider WRITE as successful only if at least one non-Faulty and non-rebuilding drive completed it.
   5
   6 From: Alex Lyakas <alex@zadarastorage.com>
   7
   8 commit 3056e3aec8d8ba61a0710fb78b2d562600aa2ea7 upstream.
   9
  10 Without that fix, the following scenario could happen:
  11
  12 - RAID1 with drives A and B; drive B was freshly-added and is rebuilding
  13 - Drive A fails
  14 - WRITE request arrives to the array. It is failed by drive A, so
  15 r1_bio is marked as R1BIO_WriteError, but the rebuilding drive B
  16 succeeds in writing it, so the same r1_bio is marked as
  17 R1BIO_Uptodate.
  18 - r1_bio arrives to handle_write_finished, badblocks are disabled,
  19 md_error()->error() does nothing because we don't fail the last drive
  20 of raid1
  21 - raid_end_bio_io()  calls call_bio_endio()
  22 - As a result, in call_bio_endio():
  23         if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
  24                 clear_bit(BIO_UPTODATE, &bio->bi_flags);
  25 this code doesn't clear the BIO_UPTODATE flag, and the whole master
  26 WRITE succeeds, back to the upper layer.
  27
  28 So we returned success to the upper layer, even though we had written
  29 the data onto the rebuilding drive only. But when we want to read the
  30 data back, we would not read from the rebuilding drive, so this data
  31 is lost.
  32
  33 [neilb - applied identical change to raid10 as well]
  34
  35 This bug can result in lost data, so it is suitable for any
  36 -stable kernel.
  37
  38 Signed-off-by: Alex Lyakas <alex@zadarastorage.com>
  39 Signed-off-by: NeilBrown <neilb@suse.de>
  40 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  41
  42 ---
  43  drivers/md/raid1.c  |   12 +++++++++++-
  44  drivers/md/raid10.c |   12 +++++++++++-
  45  2 files changed, 22 insertions(+), 2 deletions(-)
  46
  47 --- a/drivers/md/raid1.c
  48 +++ b/drivers/md/raid1.c
  49 @@ -413,7 +413,17 @@ static void raid1_end_write_request(stru
  50
  51                 r1_bio->bios[mirror] = NULL;
  52                 to_put = bio;
  53 -               set_bit(R1BIO_Uptodate, &r1_bio->state);
  54 +               /*
  55 +                * Do not set R1BIO_Uptodate if the current device is
  56 +                * rebuilding or Faulty. This is because we cannot use
  57 +                * such device for properly reading the data back (we could
  58 +                * potentially use it, if the current write would have felt
  59 +                * before rdev->recovery_offset, but for simplicity we don't
  60 +                * check this here.
  61 +                */
  62 +               if (test_bit(In_sync, &conf->mirrors[mirror].rdev->flags) &&
  63 +                   !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags))
  64 +                       set_bit(R1BIO_Uptodate, &r1_bio->state);
  65
  66                 /* Maybe we can clear some bad blocks. */
  67                 if (is_badblock(conf->mirrors[mirror].rdev,
  68 --- a/drivers/md/raid10.c
  69 +++ b/drivers/md/raid10.c
  70 @@ -452,7 +452,17 @@ static void raid10_end_write_request(str
  71                 sector_t first_bad;
  72                 int bad_sectors;
  73
  74 -               set_bit(R10BIO_Uptodate, &r10_bio->state);
  75 +               /*
  76 +                * Do not set R10BIO_Uptodate if the current device is
  77 +                * rebuilding or Faulty. This is because we cannot use
  78 +                * such device for properly reading the data back (we could
  79 +                * potentially use it, if the current write would have felt
  80 +                * before rdev->recovery_offset, but for simplicity we don't
  81 +                * check this here.
  82 +                */
  83 +               if (test_bit(In_sync, &rdev->flags) &&
  84 +                   !test_bit(Faulty, &rdev->flags))
  85 +                       set_bit(R10BIO_Uptodate, &r10_bio->state);
  86
  87                 /* Maybe we can clear some bad blocks. */
  88                 if (is_badblock(rdev,