]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.6-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 30 Nov 2012 02:21:21 +0000 (18:21 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 30 Nov 2012 02:21:21 +0000 (18:21 -0800)
added patches:
md-raid10-close-race-that-lose-writes-lost-when-replacement-completes.patch

queue-3.6/md-raid10-close-race-that-lose-writes-lost-when-replacement-completes.patch [new file with mode: 0644]
queue-3.6/series

diff --git a/queue-3.6/md-raid10-close-race-that-lose-writes-lost-when-replacement-completes.patch b/queue-3.6/md-raid10-close-race-that-lose-writes-lost-when-replacement-completes.patch
new file mode 100644 (file)
index 0000000..2b54164
--- /dev/null
@@ -0,0 +1,184 @@
+From e7c0c3fa29280d62aa5e11101a674bb3064bd791 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Thu, 22 Nov 2012 14:42:49 +1100
+Subject: md/raid10: close race that lose writes lost when replacement completes.
+
+From: NeilBrown <neilb@suse.de>
+
+commit e7c0c3fa29280d62aa5e11101a674bb3064bd791 upstream.
+
+When a replacement operation completes there is a small window
+when the original device is marked 'faulty' and the replacement
+still looks like a replacement.  The faulty should be removed and
+the replacement moved in place very quickly, bit it isn't instant.
+
+So the code write out to the array must handle the possibility that
+the only working device for some slot in the replacement - but it
+doesn't.  If the primary device is faulty it just gives up.  This
+can lead to corruption.
+
+So make the code more robust: if either  the primary or the
+replacement is present and working, write to them.  Only when
+neither are present do we give up.
+
+This bug has been present since replacement was introduced in
+3.3, so it is suitable for any -stable kernel since then.
+
+Reported-by: "George Spelvin" <linux@horizon.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: George Spelvin <linux@horizon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/raid10.c |  114 ++++++++++++++++++++++++++--------------------------
+ 1 file changed, 59 insertions(+), 55 deletions(-)
+
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -1287,18 +1287,21 @@ retry_write:
+                       blocked_rdev = rrdev;
+                       break;
+               }
++              if (rdev && (test_bit(Faulty, &rdev->flags)
++                           || test_bit(Unmerged, &rdev->flags)))
++                      rdev = NULL;
+               if (rrdev && (test_bit(Faulty, &rrdev->flags)
+                             || test_bit(Unmerged, &rrdev->flags)))
+                       rrdev = NULL;
+               r10_bio->devs[i].bio = NULL;
+               r10_bio->devs[i].repl_bio = NULL;
+-              if (!rdev || test_bit(Faulty, &rdev->flags) ||
+-                  test_bit(Unmerged, &rdev->flags)) {
++
++              if (!rdev && !rrdev) {
+                       set_bit(R10BIO_Degraded, &r10_bio->state);
+                       continue;
+               }
+-              if (test_bit(WriteErrorSeen, &rdev->flags)) {
++              if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
+                       sector_t first_bad;
+                       sector_t dev_sector = r10_bio->devs[i].addr;
+                       int bad_sectors;
+@@ -1340,8 +1343,10 @@ retry_write:
+                                       max_sectors = good_sectors;
+                       }
+               }
+-              r10_bio->devs[i].bio = bio;
+-              atomic_inc(&rdev->nr_pending);
++              if (rdev) {
++                      r10_bio->devs[i].bio = bio;
++                      atomic_inc(&rdev->nr_pending);
++              }
+               if (rrdev) {
+                       r10_bio->devs[i].repl_bio = bio;
+                       atomic_inc(&rrdev->nr_pending);
+@@ -1397,58 +1402,57 @@ retry_write:
+       for (i = 0; i < conf->copies; i++) {
+               struct bio *mbio;
+               int d = r10_bio->devs[i].devnum;
+-              if (!r10_bio->devs[i].bio)
+-                      continue;
+-              mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+-              md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
+-                          max_sectors);
+-              r10_bio->devs[i].bio = mbio;
+-
+-              mbio->bi_sector = (r10_bio->devs[i].addr+
+-                                 choose_data_offset(r10_bio,
+-                                                    conf->mirrors[d].rdev));
+-              mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
+-              mbio->bi_end_io = raid10_end_write_request;
+-              mbio->bi_rw = WRITE | do_sync | do_fua;
+-              mbio->bi_private = r10_bio;
+-
+-              atomic_inc(&r10_bio->remaining);
+-              spin_lock_irqsave(&conf->device_lock, flags);
+-              bio_list_add(&conf->pending_bio_list, mbio);
+-              conf->pending_count++;
+-              spin_unlock_irqrestore(&conf->device_lock, flags);
+-              if (!mddev_check_plugged(mddev))
+-                      md_wakeup_thread(mddev->thread);
+-
+-              if (!r10_bio->devs[i].repl_bio)
+-                      continue;
++              if (r10_bio->devs[i].bio) {
++                      struct md_rdev *rdev = conf->mirrors[d].rdev;
++                      mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
++                      md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
++                                  max_sectors);
++                      r10_bio->devs[i].bio = mbio;
++
++                      mbio->bi_sector = (r10_bio->devs[i].addr +
++                                         choose_data_offset(r10_bio, rdev));
++                      mbio->bi_bdev = rdev->bdev;
++                      mbio->bi_end_io = raid10_end_write_request;
++                      mbio->bi_rw = WRITE | do_sync | do_fua;
++                      mbio->bi_private = r10_bio;
++
++                      atomic_inc(&r10_bio->remaining);
++                      spin_lock_irqsave(&conf->device_lock, flags);
++                      bio_list_add(&conf->pending_bio_list, mbio);
++                      conf->pending_count++;
++                      spin_unlock_irqrestore(&conf->device_lock, flags);
++                      if (!mddev_check_plugged(mddev))
++                              md_wakeup_thread(mddev->thread);
++              }
+-              mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
+-              md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
+-                          max_sectors);
+-              r10_bio->devs[i].repl_bio = mbio;
+-
+-              /* We are actively writing to the original device
+-               * so it cannot disappear, so the replacement cannot
+-               * become NULL here
+-               */
+-              mbio->bi_sector = (r10_bio->devs[i].addr +
+-                                 choose_data_offset(
+-                                         r10_bio,
+-                                         conf->mirrors[d].replacement));
+-              mbio->bi_bdev = conf->mirrors[d].replacement->bdev;
+-              mbio->bi_end_io = raid10_end_write_request;
+-              mbio->bi_rw = WRITE | do_sync | do_fua;
+-              mbio->bi_private = r10_bio;
+-
+-              atomic_inc(&r10_bio->remaining);
+-              spin_lock_irqsave(&conf->device_lock, flags);
+-              bio_list_add(&conf->pending_bio_list, mbio);
+-              conf->pending_count++;
+-              spin_unlock_irqrestore(&conf->device_lock, flags);
+-              if (!mddev_check_plugged(mddev))
+-                      md_wakeup_thread(mddev->thread);
++              if (r10_bio->devs[i].repl_bio) {
++                      struct md_rdev *rdev = conf->mirrors[d].replacement;
++                      if (rdev == NULL) {
++                              /* Replacement just got moved to main 'rdev' */
++                              smp_mb();
++                              rdev = conf->mirrors[d].rdev;
++                      }
++                      mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
++                      md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
++                                  max_sectors);
++                      r10_bio->devs[i].repl_bio = mbio;
++
++                      mbio->bi_sector = (r10_bio->devs[i].addr +
++                                         choose_data_offset(r10_bio, rdev));
++                      mbio->bi_bdev = rdev->bdev;
++                      mbio->bi_end_io = raid10_end_write_request;
++                      mbio->bi_rw = WRITE | do_sync | do_fua;
++                      mbio->bi_private = r10_bio;
++
++                      atomic_inc(&r10_bio->remaining);
++                      spin_lock_irqsave(&conf->device_lock, flags);
++                      bio_list_add(&conf->pending_bio_list, mbio);
++                      conf->pending_count++;
++                      spin_unlock_irqrestore(&conf->device_lock, flags);
++                      if (!mddev_check_plugged(mddev))
++                              md_wakeup_thread(mddev->thread);
++              }
+       }
+       /* Don't remove the bias on 'remaining' (one_write_done) until
index f8b4c6501616320f0b99d59700482c59202f90e6..707a6bdaf54e658d782ec318fe134b7a521d1cc2 100644 (file)
@@ -51,3 +51,4 @@ mpi-fix-compilation-on-mips-with-gcc-4.4-and-newer.patch
 ext4-remove-erroneous-ext4_superblock_csum_set-in-update_backups.patch
 powerpc-eeh-lock-module-while-handling-eeh-event.patch
 mmc-sdhci-s3c-fix-the-wrong-number-of-max-bus-clocks.patch
+md-raid10-close-race-that-lose-writes-lost-when-replacement-completes.patch