]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 22 Jan 2014 22:07:28 +0000 (14:07 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 22 Jan 2014 22:07:28 +0000 (14:07 -0800)
added patches:
md-raid10-fix-bug-when-raid10-recovery-fails-to-recover-a-block.patch
md-raid10-fix-two-bugs-in-handling-of-known-bad-blocks.patch
md-raid5-fix-possible-confusion-when-multiple-write-errors-occur.patch
nilfs2-fix-segctor-bug-that-causes-file-system-corruption.patch
serial-amba-pl011-use-port-lock-to-guard-control-register-access.patch

queue-3.4/md-raid10-fix-bug-when-raid10-recovery-fails-to-recover-a-block.patch [new file with mode: 0644]
queue-3.4/md-raid10-fix-two-bugs-in-handling-of-known-bad-blocks.patch [new file with mode: 0644]
queue-3.4/md-raid5-fix-possible-confusion-when-multiple-write-errors-occur.patch [new file with mode: 0644]
queue-3.4/nilfs2-fix-segctor-bug-that-causes-file-system-corruption.patch [new file with mode: 0644]
queue-3.4/serial-amba-pl011-use-port-lock-to-guard-control-register-access.patch [new file with mode: 0644]
queue-3.4/series

diff --git a/queue-3.4/md-raid10-fix-bug-when-raid10-recovery-fails-to-recover-a-block.patch b/queue-3.4/md-raid10-fix-bug-when-raid10-recovery-fails-to-recover-a-block.patch
new file mode 100644 (file)
index 0000000..7d85f17
--- /dev/null
@@ -0,0 +1,54 @@
+From e8b849158508565e0cd6bc80061124afc5879160 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Mon, 6 Jan 2014 10:35:34 +1100
+Subject: md/raid10: fix bug when raid10 recovery fails to recover a block.
+
+From: NeilBrown <neilb@suse.de>
+
+commit e8b849158508565e0cd6bc80061124afc5879160 upstream.
+
+commit e875ecea266a543e643b19e44cf472f1412708f9
+    md/raid10 record bad blocks as needed during recovery.
+
+added code to the "cannot recover this block" path to record a bad
+block rather than fail the whole recovery.
+Unfortunately this new case was placed *after* r10bio was freed rather
+than *before*, yet it still uses r10bio.
+This is will crash with a null dereference.
+
+So move the freeing of r10bio down where it is safe.
+
+Fixes: e875ecea266a543e643b19e44cf472f1412708f9
+Reported-by: Damian Nowak <spam@nowaker.net>
+URL: https://bugzilla.kernel.org/show_bug.cgi?id=68181
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/raid10.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -2943,10 +2943,6 @@ static sector_t sync_request(struct mdde
+                       if (j == conf->copies) {
+                               /* Cannot recover, so abort the recovery or
+                                * record a bad block */
+-                              put_buf(r10_bio);
+-                              if (rb2)
+-                                      atomic_dec(&rb2->remaining);
+-                              r10_bio = rb2;
+                               if (any_working) {
+                                       /* problem is that there are bad blocks
+                                        * on other device(s)
+@@ -2978,6 +2974,10 @@ static sector_t sync_request(struct mdde
+                                       mirror->recovery_disabled
+                                               = mddev->recovery_disabled;
+                               }
++                              put_buf(r10_bio);
++                              if (rb2)
++                                      atomic_dec(&rb2->remaining);
++                              r10_bio = rb2;
+                               break;
+                       }
+               }
diff --git a/queue-3.4/md-raid10-fix-two-bugs-in-handling-of-known-bad-blocks.patch b/queue-3.4/md-raid10-fix-two-bugs-in-handling-of-known-bad-blocks.patch
new file mode 100644 (file)
index 0000000..7127408
--- /dev/null
@@ -0,0 +1,50 @@
+From b50c259e25d9260b9108dc0c2964c26e5ecbe1c1 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Tue, 14 Jan 2014 10:38:09 +1100
+Subject: md/raid10: fix two bugs in handling of known-bad-blocks.
+
+From: NeilBrown <neilb@suse.de>
+
+commit b50c259e25d9260b9108dc0c2964c26e5ecbe1c1 upstream.
+
+If we discover a bad block when reading we split the request and
+potentially read some of it from a different device.
+
+The code path of this has two bugs in RAID10.
+1/ we get a spin_lock with _irq, but unlock without _irq!!
+2/ The calculation of 'sectors_handled' is wrong, as can be clearly
+   seen by comparison with raid1.c
+
+This leads to at least 2 warnings and a probable crash is a RAID10
+ever had known bad blocks.
+
+Fixes: 856e08e23762dfb92ffc68fd0a8d228f9e152160
+Reported-by: Damian Nowak <spam@nowaker.net>
+URL: https://bugzilla.kernel.org/show_bug.cgi?id=68181
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/raid10.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -1117,7 +1117,7 @@ read_again:
+                       /* Could not read all from this device, so we will
+                        * need another r10_bio.
+                        */
+-                      sectors_handled = (r10_bio->sectors + max_sectors
++                      sectors_handled = (r10_bio->sector + max_sectors
+                                          - bio->bi_sector);
+                       r10_bio->sectors = max_sectors;
+                       spin_lock_irq(&conf->device_lock);
+@@ -1125,7 +1125,7 @@ read_again:
+                               bio->bi_phys_segments = 2;
+                       else
+                               bio->bi_phys_segments++;
+-                      spin_unlock(&conf->device_lock);
++                      spin_unlock_irq(&conf->device_lock);
+                       /* Cannot call generic_make_request directly
+                        * as that will be queued in __generic_make_request
+                        * and subsequent mempool_alloc might block
diff --git a/queue-3.4/md-raid5-fix-possible-confusion-when-multiple-write-errors-occur.patch b/queue-3.4/md-raid5-fix-possible-confusion-when-multiple-write-errors-occur.patch
new file mode 100644 (file)
index 0000000..408b22c
--- /dev/null
@@ -0,0 +1,53 @@
+From 1cc03eb93245e63b0b7a7832165efdc52e25b4e6 Mon Sep 17 00:00:00 2001
+From: NeilBrown <neilb@suse.de>
+Date: Mon, 6 Jan 2014 13:19:42 +1100
+Subject: md/raid5: Fix possible confusion when multiple write errors occur.
+
+From: NeilBrown <neilb@suse.de>
+
+commit 1cc03eb93245e63b0b7a7832165efdc52e25b4e6 upstream.
+
+commit 5d8c71f9e5fbdd95650be00294d238e27a363b5c
+    md: raid5 crash during degradation
+
+Fixed a crash in an overly simplistic way which could leave
+R5_WriteError or R5_MadeGood set in the stripe cache for devices
+for which it is no longer relevant.
+When those devices are removed and spares added the flags are still
+set and can cause incorrect behaviour.
+
+commit 14a75d3e07c784c004b4b44b34af996b8e4ac453
+    md/raid5: preferentially read from replacement device if possible.
+
+Fixed the same bug if a more effective way, so we can now revert
+the original commit.
+
+Reported-and-tested-by: Alexander Lyakas <alex.bolshoy@gmail.com>
+Fixes: 5d8c71f9e5fbdd95650be00294d238e27a363b5c
+Signed-off-by: NeilBrown <neilb@suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/md/raid5.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/md/raid5.c
++++ b/drivers/md/raid5.c
+@@ -3238,7 +3238,7 @@ static void analyse_stripe(struct stripe
+                        */
+                       set_bit(R5_Insync, &dev->flags);
+-              if (rdev && test_bit(R5_WriteError, &dev->flags)) {
++              if (test_bit(R5_WriteError, &dev->flags)) {
+                       /* This flag does not apply to '.replacement'
+                        * only to .rdev, so make sure to check that*/
+                       struct md_rdev *rdev2 = rcu_dereference(
+@@ -3251,7 +3251,7 @@ static void analyse_stripe(struct stripe
+                       } else
+                               clear_bit(R5_WriteError, &dev->flags);
+               }
+-              if (rdev && test_bit(R5_MadeGood, &dev->flags)) {
++              if (test_bit(R5_MadeGood, &dev->flags)) {
+                       /* This flag does not apply to '.replacement'
+                        * only to .rdev, so make sure to check that*/
+                       struct md_rdev *rdev2 = rcu_dereference(
diff --git a/queue-3.4/nilfs2-fix-segctor-bug-that-causes-file-system-corruption.patch b/queue-3.4/nilfs2-fix-segctor-bug-that-causes-file-system-corruption.patch
new file mode 100644 (file)
index 0000000..24fedcd
--- /dev/null
@@ -0,0 +1,86 @@
+From 70f2fe3a26248724d8a5019681a869abdaf3e89a Mon Sep 17 00:00:00 2001
+From: Andreas Rohner <andreas.rohner@gmx.net>
+Date: Tue, 14 Jan 2014 17:56:36 -0800
+Subject: nilfs2: fix segctor bug that causes file system corruption
+
+From: Andreas Rohner <andreas.rohner@gmx.net>
+
+commit 70f2fe3a26248724d8a5019681a869abdaf3e89a upstream.
+
+There is a bug in the function nilfs_segctor_collect, which results in
+active data being written to a segment, that is marked as clean.  It is
+possible, that this segment is selected for a later segment
+construction, whereby the old data is overwritten.
+
+The problem shows itself with the following kernel log message:
+
+  nilfs_sufile_do_cancel_free: segment 6533 must be clean
+
+Usually a few hours later the file system gets corrupted:
+
+  NILFS: bad btree node (blocknr=8748107): level = 0, flags = 0x0, nchildren = 0
+  NILFS error (device sdc1): nilfs_bmap_last_key: broken bmap (inode number=114660)
+
+The issue can be reproduced with a file system that is nearly full and
+with the cleaner running, while some IO intensive task is running.
+Although it is quite hard to reproduce.
+
+This is what happens:
+
+ 1. The cleaner starts the segment construction
+ 2. nilfs_segctor_collect is called
+ 3. sc_stage is on NILFS_ST_SUFILE and segments are freed
+ 4. sc_stage is on NILFS_ST_DAT current segment is full
+ 5. nilfs_segctor_extend_segments is called, which
+    allocates a new segment
+ 6. The new segment is one of the segments freed in step 3
+ 7. nilfs_sufile_cancel_freev is called and produces an error message
+ 8. Loop around and the collection starts again
+ 9. sc_stage is on NILFS_ST_SUFILE and segments are freed
+    including the newly allocated segment, which will contain active
+    data and can be allocated at a later time
+10. A few hours later another segment construction allocates the
+    segment and causes file system corruption
+
+This can be prevented by simply reordering the statements.  If
+nilfs_sufile_cancel_freev is called before nilfs_segctor_extend_segments
+the freed segments are marked as dirty and cannot be allocated any more.
+
+Signed-off-by: Andreas Rohner <andreas.rohner@gmx.net>
+Reviewed-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
+Tested-by: Andreas Rohner <andreas.rohner@gmx.net>
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/nilfs2/segment.c |   10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+--- a/fs/nilfs2/segment.c
++++ b/fs/nilfs2/segment.c
+@@ -1436,17 +1436,19 @@ static int nilfs_segctor_collect(struct
+               nilfs_clear_logs(&sci->sc_segbufs);
+-              err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
+-              if (unlikely(err))
+-                      return err;
+-
+               if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
+                       err = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
+                                                       sci->sc_freesegs,
+                                                       sci->sc_nfreesegs,
+                                                       NULL);
+                       WARN_ON(err); /* do not happen */
++                      sci->sc_stage.flags &= ~NILFS_CF_SUFREED;
+               }
++
++              err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
++              if (unlikely(err))
++                      return err;
++
+               nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
+               sci->sc_stage = prev_stage;
+       }
diff --git a/queue-3.4/serial-amba-pl011-use-port-lock-to-guard-control-register-access.patch b/queue-3.4/serial-amba-pl011-use-port-lock-to-guard-control-register-access.patch
new file mode 100644 (file)
index 0000000..9869451
--- /dev/null
@@ -0,0 +1,71 @@
+From fe43390702a1b5741fdf217063b05c7612b38303 Mon Sep 17 00:00:00 2001
+From: Jon Medhurst <tixy@linaro.org>
+Date: Tue, 10 Dec 2013 10:18:58 +0000
+Subject: serial: amba-pl011: use port lock to guard control register access
+
+From: Jon Medhurst <tixy@linaro.org>
+
+commit fe43390702a1b5741fdf217063b05c7612b38303 upstream.
+
+When the pl011 is being used for a console, pl011_console_write forces
+the control register (CR) to enable the UART for transmission and then
+restores this to the original value afterwards. It does this while
+holding the port lock.
+
+Unfortunately, when the uart is started or shutdown - say in response to
+userland using the serial device for a terminal - then this updates the
+control register without any locking.
+
+This means we can have
+
+  pl011_console_write   Save CR
+  pl011_startup         Initialise CR, e.g. enable receive
+  pl011_console_write   Restore old CR with receive not enabled
+
+this result is a serial port which doesn't respond to any input.
+
+A similar race in reverse could happen when the device is shutdown.
+
+We can fix these problems by taking the port lock when updating CR.
+
+Signed-off-by: Jon Medhurst <tixy@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/tty/serial/amba-pl011.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/tty/serial/amba-pl011.c
++++ b/drivers/tty/serial/amba-pl011.c
+@@ -1413,6 +1413,8 @@ static int pl011_startup(struct uart_por
+       /*
+        * Provoke TX FIFO interrupt into asserting.
+        */
++      spin_lock_irq(&uap->port.lock);
++
+       cr = UART01x_CR_UARTEN | UART011_CR_TXE | UART011_CR_LBE;
+       writew(cr, uap->port.membase + UART011_CR);
+       writew(0, uap->port.membase + UART011_FBRD);
+@@ -1437,6 +1439,8 @@ static int pl011_startup(struct uart_por
+       cr |= UART01x_CR_UARTEN | UART011_CR_RXE | UART011_CR_TXE;
+       writew(cr, uap->port.membase + UART011_CR);
++      spin_unlock_irq(&uap->port.lock);
++
+       /*
+        * initialise the old status of the modem signals
+        */
+@@ -1516,11 +1520,13 @@ static void pl011_shutdown(struct uart_p
+        * it during startup().
+        */
+       uap->autorts = false;
++      spin_lock_irq(&uap->port.lock);
+       cr = readw(uap->port.membase + UART011_CR);
+       uap->old_cr = cr;
+       cr &= UART011_CR_RTS | UART011_CR_DTR;
+       cr |= UART01x_CR_UARTEN | UART011_CR_TXE;
+       writew(cr, uap->port.membase + UART011_CR);
++      spin_unlock_irq(&uap->port.lock);
+       /*
+        * disable break condition and fifos
index 7f369a42595127334779113e2a58e46f50e7850c..a8bc94840788e16aa6f17e02d3c0fae6ecc0ed21 100644 (file)
@@ -4,3 +4,8 @@ perf-x86-amd-ibs-fix-waking-up-from-s3-for-amd-family-10h.patch
 mm-memory-failure.c-recheck-pagehuge-after-hugetlb-page-migrate-successfully.patch
 hwmon-coretemp-fix-truncated-name-of-alarm-attributes.patch
 selinux-fix-possible-null-pointer-dereference-in-selinux_inode_permission.patch
+nilfs2-fix-segctor-bug-that-causes-file-system-corruption.patch
+md-raid10-fix-bug-when-raid10-recovery-fails-to-recover-a-block.patch
+md-raid10-fix-two-bugs-in-handling-of-known-bad-blocks.patch
+md-raid5-fix-possible-confusion-when-multiple-write-errors-occur.patch
+serial-amba-pl011-use-port-lock-to-guard-control-register-access.patch