From: Greg Kroah-Hartman Date: Wed, 22 Jan 2014 22:07:28 +0000 (-0800) Subject: 3.4-stable patches X-Git-Tag: v3.10.28~4 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=bf0e9d0e1fd32eb8a18b355b4a8649bbcd4f0e11;p=thirdparty%2Fkernel%2Fstable-queue.git 3.4-stable patches added patches: md-raid10-fix-bug-when-raid10-recovery-fails-to-recover-a-block.patch md-raid10-fix-two-bugs-in-handling-of-known-bad-blocks.patch md-raid5-fix-possible-confusion-when-multiple-write-errors-occur.patch nilfs2-fix-segctor-bug-that-causes-file-system-corruption.patch serial-amba-pl011-use-port-lock-to-guard-control-register-access.patch --- diff --git a/queue-3.4/md-raid10-fix-bug-when-raid10-recovery-fails-to-recover-a-block.patch b/queue-3.4/md-raid10-fix-bug-when-raid10-recovery-fails-to-recover-a-block.patch new file mode 100644 index 00000000000..7d85f178d6b --- /dev/null +++ b/queue-3.4/md-raid10-fix-bug-when-raid10-recovery-fails-to-recover-a-block.patch @@ -0,0 +1,54 @@ +From e8b849158508565e0cd6bc80061124afc5879160 Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Mon, 6 Jan 2014 10:35:34 +1100 +Subject: md/raid10: fix bug when raid10 recovery fails to recover a block. + +From: NeilBrown + +commit e8b849158508565e0cd6bc80061124afc5879160 upstream. + +commit e875ecea266a543e643b19e44cf472f1412708f9 + md/raid10 record bad blocks as needed during recovery. + +added code to the "cannot recover this block" path to record a bad +block rather than fail the whole recovery. +Unfortunately this new case was placed *after* r10bio was freed rather +than *before*, yet it still uses r10bio. +This is will crash with a null dereference. + +So move the freeing of r10bio down where it is safe. + +Fixes: e875ecea266a543e643b19e44cf472f1412708f9 +Reported-by: Damian Nowak +URL: https://bugzilla.kernel.org/show_bug.cgi?id=68181 +Signed-off-by: NeilBrown +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/raid10.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/drivers/md/raid10.c ++++ b/drivers/md/raid10.c +@@ -2943,10 +2943,6 @@ static sector_t sync_request(struct mdde + if (j == conf->copies) { + /* Cannot recover, so abort the recovery or + * record a bad block */ +- put_buf(r10_bio); +- if (rb2) +- atomic_dec(&rb2->remaining); +- r10_bio = rb2; + if (any_working) { + /* problem is that there are bad blocks + * on other device(s) +@@ -2978,6 +2974,10 @@ static sector_t sync_request(struct mdde + mirror->recovery_disabled + = mddev->recovery_disabled; + } ++ put_buf(r10_bio); ++ if (rb2) ++ atomic_dec(&rb2->remaining); ++ r10_bio = rb2; + break; + } + } diff --git a/queue-3.4/md-raid10-fix-two-bugs-in-handling-of-known-bad-blocks.patch b/queue-3.4/md-raid10-fix-two-bugs-in-handling-of-known-bad-blocks.patch new file mode 100644 index 00000000000..71274080f50 --- /dev/null +++ b/queue-3.4/md-raid10-fix-two-bugs-in-handling-of-known-bad-blocks.patch @@ -0,0 +1,50 @@ +From b50c259e25d9260b9108dc0c2964c26e5ecbe1c1 Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Tue, 14 Jan 2014 10:38:09 +1100 +Subject: md/raid10: fix two bugs in handling of known-bad-blocks. + +From: NeilBrown + +commit b50c259e25d9260b9108dc0c2964c26e5ecbe1c1 upstream. + +If we discover a bad block when reading we split the request and +potentially read some of it from a different device. + +The code path of this has two bugs in RAID10. +1/ we get a spin_lock with _irq, but unlock without _irq!! +2/ The calculation of 'sectors_handled' is wrong, as can be clearly + seen by comparison with raid1.c + +This leads to at least 2 warnings and a probable crash is a RAID10 +ever had known bad blocks. + +Fixes: 856e08e23762dfb92ffc68fd0a8d228f9e152160 +Reported-by: Damian Nowak +URL: https://bugzilla.kernel.org/show_bug.cgi?id=68181 +Signed-off-by: NeilBrown +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/raid10.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/md/raid10.c ++++ b/drivers/md/raid10.c +@@ -1117,7 +1117,7 @@ read_again: + /* Could not read all from this device, so we will + * need another r10_bio. + */ +- sectors_handled = (r10_bio->sectors + max_sectors ++ sectors_handled = (r10_bio->sector + max_sectors + - bio->bi_sector); + r10_bio->sectors = max_sectors; + spin_lock_irq(&conf->device_lock); +@@ -1125,7 +1125,7 @@ read_again: + bio->bi_phys_segments = 2; + else + bio->bi_phys_segments++; +- spin_unlock(&conf->device_lock); ++ spin_unlock_irq(&conf->device_lock); + /* Cannot call generic_make_request directly + * as that will be queued in __generic_make_request + * and subsequent mempool_alloc might block diff --git a/queue-3.4/md-raid5-fix-possible-confusion-when-multiple-write-errors-occur.patch b/queue-3.4/md-raid5-fix-possible-confusion-when-multiple-write-errors-occur.patch new file mode 100644 index 00000000000..408b22cc5a2 --- /dev/null +++ b/queue-3.4/md-raid5-fix-possible-confusion-when-multiple-write-errors-occur.patch @@ -0,0 +1,53 @@ +From 1cc03eb93245e63b0b7a7832165efdc52e25b4e6 Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Mon, 6 Jan 2014 13:19:42 +1100 +Subject: md/raid5: Fix possible confusion when multiple write errors occur. + +From: NeilBrown + +commit 1cc03eb93245e63b0b7a7832165efdc52e25b4e6 upstream. + +commit 5d8c71f9e5fbdd95650be00294d238e27a363b5c + md: raid5 crash during degradation + +Fixed a crash in an overly simplistic way which could leave +R5_WriteError or R5_MadeGood set in the stripe cache for devices +for which it is no longer relevant. +When those devices are removed and spares added the flags are still +set and can cause incorrect behaviour. + +commit 14a75d3e07c784c004b4b44b34af996b8e4ac453 + md/raid5: preferentially read from replacement device if possible. + +Fixed the same bug if a more effective way, so we can now revert +the original commit. + +Reported-and-tested-by: Alexander Lyakas +Fixes: 5d8c71f9e5fbdd95650be00294d238e27a363b5c +Signed-off-by: NeilBrown +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/raid5.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/md/raid5.c ++++ b/drivers/md/raid5.c +@@ -3238,7 +3238,7 @@ static void analyse_stripe(struct stripe + */ + set_bit(R5_Insync, &dev->flags); + +- if (rdev && test_bit(R5_WriteError, &dev->flags)) { ++ if (test_bit(R5_WriteError, &dev->flags)) { + /* This flag does not apply to '.replacement' + * only to .rdev, so make sure to check that*/ + struct md_rdev *rdev2 = rcu_dereference( +@@ -3251,7 +3251,7 @@ static void analyse_stripe(struct stripe + } else + clear_bit(R5_WriteError, &dev->flags); + } +- if (rdev && test_bit(R5_MadeGood, &dev->flags)) { ++ if (test_bit(R5_MadeGood, &dev->flags)) { + /* This flag does not apply to '.replacement' + * only to .rdev, so make sure to check that*/ + struct md_rdev *rdev2 = rcu_dereference( diff --git a/queue-3.4/nilfs2-fix-segctor-bug-that-causes-file-system-corruption.patch b/queue-3.4/nilfs2-fix-segctor-bug-that-causes-file-system-corruption.patch new file mode 100644 index 00000000000..24fedcd88bb --- /dev/null +++ b/queue-3.4/nilfs2-fix-segctor-bug-that-causes-file-system-corruption.patch @@ -0,0 +1,86 @@ +From 70f2fe3a26248724d8a5019681a869abdaf3e89a Mon Sep 17 00:00:00 2001 +From: Andreas Rohner +Date: Tue, 14 Jan 2014 17:56:36 -0800 +Subject: nilfs2: fix segctor bug that causes file system corruption + +From: Andreas Rohner + +commit 70f2fe3a26248724d8a5019681a869abdaf3e89a upstream. + +There is a bug in the function nilfs_segctor_collect, which results in +active data being written to a segment, that is marked as clean. It is +possible, that this segment is selected for a later segment +construction, whereby the old data is overwritten. + +The problem shows itself with the following kernel log message: + + nilfs_sufile_do_cancel_free: segment 6533 must be clean + +Usually a few hours later the file system gets corrupted: + + NILFS: bad btree node (blocknr=8748107): level = 0, flags = 0x0, nchildren = 0 + NILFS error (device sdc1): nilfs_bmap_last_key: broken bmap (inode number=114660) + +The issue can be reproduced with a file system that is nearly full and +with the cleaner running, while some IO intensive task is running. +Although it is quite hard to reproduce. + +This is what happens: + + 1. The cleaner starts the segment construction + 2. nilfs_segctor_collect is called + 3. sc_stage is on NILFS_ST_SUFILE and segments are freed + 4. sc_stage is on NILFS_ST_DAT current segment is full + 5. nilfs_segctor_extend_segments is called, which + allocates a new segment + 6. The new segment is one of the segments freed in step 3 + 7. nilfs_sufile_cancel_freev is called and produces an error message + 8. Loop around and the collection starts again + 9. sc_stage is on NILFS_ST_SUFILE and segments are freed + including the newly allocated segment, which will contain active + data and can be allocated at a later time +10. A few hours later another segment construction allocates the + segment and causes file system corruption + +This can be prevented by simply reordering the statements. If +nilfs_sufile_cancel_freev is called before nilfs_segctor_extend_segments +the freed segments are marked as dirty and cannot be allocated any more. + +Signed-off-by: Andreas Rohner +Reviewed-by: Ryusuke Konishi +Tested-by: Andreas Rohner +Signed-off-by: Ryusuke Konishi +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + fs/nilfs2/segment.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/fs/nilfs2/segment.c ++++ b/fs/nilfs2/segment.c +@@ -1436,17 +1436,19 @@ static int nilfs_segctor_collect(struct + + nilfs_clear_logs(&sci->sc_segbufs); + +- err = nilfs_segctor_extend_segments(sci, nilfs, nadd); +- if (unlikely(err)) +- return err; +- + if (sci->sc_stage.flags & NILFS_CF_SUFREED) { + err = nilfs_sufile_cancel_freev(nilfs->ns_sufile, + sci->sc_freesegs, + sci->sc_nfreesegs, + NULL); + WARN_ON(err); /* do not happen */ ++ sci->sc_stage.flags &= ~NILFS_CF_SUFREED; + } ++ ++ err = nilfs_segctor_extend_segments(sci, nilfs, nadd); ++ if (unlikely(err)) ++ return err; ++ + nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); + sci->sc_stage = prev_stage; + } diff --git a/queue-3.4/serial-amba-pl011-use-port-lock-to-guard-control-register-access.patch b/queue-3.4/serial-amba-pl011-use-port-lock-to-guard-control-register-access.patch new file mode 100644 index 00000000000..98694513f16 --- /dev/null +++ b/queue-3.4/serial-amba-pl011-use-port-lock-to-guard-control-register-access.patch @@ -0,0 +1,71 @@ +From fe43390702a1b5741fdf217063b05c7612b38303 Mon Sep 17 00:00:00 2001 +From: Jon Medhurst +Date: Tue, 10 Dec 2013 10:18:58 +0000 +Subject: serial: amba-pl011: use port lock to guard control register access + +From: Jon Medhurst + +commit fe43390702a1b5741fdf217063b05c7612b38303 upstream. + +When the pl011 is being used for a console, pl011_console_write forces +the control register (CR) to enable the UART for transmission and then +restores this to the original value afterwards. It does this while +holding the port lock. + +Unfortunately, when the uart is started or shutdown - say in response to +userland using the serial device for a terminal - then this updates the +control register without any locking. + +This means we can have + + pl011_console_write Save CR + pl011_startup Initialise CR, e.g. enable receive + pl011_console_write Restore old CR with receive not enabled + +this result is a serial port which doesn't respond to any input. + +A similar race in reverse could happen when the device is shutdown. + +We can fix these problems by taking the port lock when updating CR. + +Signed-off-by: Jon Medhurst +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/tty/serial/amba-pl011.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/tty/serial/amba-pl011.c ++++ b/drivers/tty/serial/amba-pl011.c +@@ -1413,6 +1413,8 @@ static int pl011_startup(struct uart_por + /* + * Provoke TX FIFO interrupt into asserting. + */ ++ spin_lock_irq(&uap->port.lock); ++ + cr = UART01x_CR_UARTEN | UART011_CR_TXE | UART011_CR_LBE; + writew(cr, uap->port.membase + UART011_CR); + writew(0, uap->port.membase + UART011_FBRD); +@@ -1437,6 +1439,8 @@ static int pl011_startup(struct uart_por + cr |= UART01x_CR_UARTEN | UART011_CR_RXE | UART011_CR_TXE; + writew(cr, uap->port.membase + UART011_CR); + ++ spin_unlock_irq(&uap->port.lock); ++ + /* + * initialise the old status of the modem signals + */ +@@ -1516,11 +1520,13 @@ static void pl011_shutdown(struct uart_p + * it during startup(). + */ + uap->autorts = false; ++ spin_lock_irq(&uap->port.lock); + cr = readw(uap->port.membase + UART011_CR); + uap->old_cr = cr; + cr &= UART011_CR_RTS | UART011_CR_DTR; + cr |= UART01x_CR_UARTEN | UART011_CR_TXE; + writew(cr, uap->port.membase + UART011_CR); ++ spin_unlock_irq(&uap->port.lock); + + /* + * disable break condition and fifos diff --git a/queue-3.4/series b/queue-3.4/series index 7f369a42595..a8bc9484078 100644 --- a/queue-3.4/series +++ b/queue-3.4/series @@ -4,3 +4,8 @@ perf-x86-amd-ibs-fix-waking-up-from-s3-for-amd-family-10h.patch mm-memory-failure.c-recheck-pagehuge-after-hugetlb-page-migrate-successfully.patch hwmon-coretemp-fix-truncated-name-of-alarm-attributes.patch selinux-fix-possible-null-pointer-dereference-in-selinux_inode_permission.patch +nilfs2-fix-segctor-bug-that-causes-file-system-corruption.patch +md-raid10-fix-bug-when-raid10-recovery-fails-to-recover-a-block.patch +md-raid10-fix-two-bugs-in-handling-of-known-bad-blocks.patch +md-raid5-fix-possible-confusion-when-multiple-write-errors-occur.patch +serial-amba-pl011-use-port-lock-to-guard-control-register-access.patch