git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob

   1 From stable-bounces@linux.kernel.org  Wed May  9 23:24:47 2007
   2 From: NeilBrown <neilb@suse.de>
   3 To: Andrew Morton <akpm@linux-foundation.org>
   4 Date: Thu, 10 May 2007 16:22:25 +1000
   5 Message-Id: <1070510062225.20388@suse.de>
   6 Cc: linux-raid@vger.kernel.org, Neil Brown <neilb@suse.de>, stable@kernel.org
   7 Subject: md: Avoid a possibility that a read error can wrongly propagate through md/raid1 to a filesystem.
   8
   9 When a raid1 has only one working drive, we want read error to
  10 propagate up to the filesystem as there is no point failing the last
  11 drive in an array.
  12
  13 Currently the code perform this check is racy.  If a write and a read
  14 a both submitted to a device on a 2-drive raid1, and the write fails
  15 followed by the read failing, the read will see that there is only one
  16 working drive and will pass the failure up, even though the one
  17 working drive is actually the *other* one.
  18
  19 So, tighten up the locking.
  20
  21 Signed-off-by: Neil Brown <neilb@suse.de>
  22 Signed-off-by: Chris Wright <chrisw@sous-sol.org>
  23 ---
  24
  25  drivers/md/raid1.c |   33 +++++++++++++++++++--------------
  26  1 file changed, 19 insertions(+), 14 deletions(-)
  27
  28 diff .prev/drivers/md/raid1.c ./drivers/md/raid1.c
  29 --- linux-2.6.21.1.orig/drivers/md/raid1.c
  30 +++ linux-2.6.21.1/drivers/md/raid1.c
  31 @@ -271,21 +271,25 @@ static int raid1_end_read_request(struct
  32          */
  33         update_head_pos(mirror, r1_bio);
  34
  35 -       if (uptodate || (conf->raid_disks - conf->mddev->degraded) <= 1) {
  36 -               /*
  37 -                * Set R1BIO_Uptodate in our master bio, so that
  38 -                * we will return a good error code for to the higher
  39 -                * levels even if IO on some other mirrored buffer fails.
  40 -                *
  41 -                * The 'master' represents the composite IO operation to
  42 -                * user-side. So if something waits for IO, then it will
  43 -                * wait for the 'master' bio.
  44 +       if (uptodate)
  45 +               set_bit(R1BIO_Uptodate, &r1_bio->state);
  46 +       else {
  47 +               /* If all other devices have failed, we want to return
  48 +                * the error upwards rather than fail the last device.
  49 +                * Here we redefine "uptodate" to mean "Don't want to retry"
  50                  */
  51 -               if (uptodate)
  52 -                       set_bit(R1BIO_Uptodate, &r1_bio->state);
  53 +               unsigned long flags;
  54 +               spin_lock_irqsave(&conf->device_lock, flags);
  55 +               if (r1_bio->mddev->degraded == conf->raid_disks ||
  56 +                   (r1_bio->mddev->degraded == conf->raid_disks-1 &&
  57 +                    !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags)))
  58 +                       uptodate = 1;
  59 +               spin_unlock_irqrestore(&conf->device_lock, flags);
  60 +       }
  61
  62 +       if (uptodate)
  63                 raid_end_bio_io(r1_bio);
  64 -       } else {
  65 +       else {
  66                 /*
  67                  * oops, read error:
  68                  */
  69 @@ -992,13 +996,14 @@ static void error(mddev_t *mddev, mdk_rd
  70                 unsigned long flags;
  71                 spin_lock_irqsave(&conf->device_lock, flags);
  72                 mddev->degraded++;
  73 +               set_bit(Faulty, &rdev->flags);
  74                 spin_unlock_irqrestore(&conf->device_lock, flags);
  75                 /*
  76                  * if recovery is running, make sure it aborts.
  77                  */
  78                 set_bit(MD_RECOVERY_ERR, &mddev->recovery);
  79 -       }
  80 -       set_bit(Faulty, &rdev->flags);
  81 +       } else
  82 +               set_bit(Faulty, &rdev->flags);
  83         set_bit(MD_CHANGE_DEVS, &mddev->flags);
  84         printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n"
  85                 "       Operation continuing on %d devices\n",