From 666dcf07f1c6244dc07aa7190b35514c105390ad Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 2 Jul 2014 16:20:15 -0700 Subject: [PATCH] 3.14-stable patches added patches: kernel-watchdog.c-remove-preemption-restrictions-when-restarting-lockup-detector.patch ubifs-fix-an-mmap-and-fsync-race-condition.patch ubifs-remove-incorrect-assertion-in-shrink_tnc.patch watchdog-ath79_wdt-avoid-spurious-restarts-on-ar934x.patch watchdog-kempld-wdt-use-the-correct-value-when-configuring-the-prescaler-with-the-watchdog.patch watchdog-sp805-set-watchdog_device-timeout-from-set_timeout.patch --- ...ions-when-restarting-lockup-detector.patch | 75 +++++++++++++ queue-3.14/series | 6 + ...fix-an-mmap-and-fsync-race-condition.patch | 63 +++++++++++ ...ve-incorrect-assertion-in-shrink_tnc.patch | 104 ++++++++++++++++++ ...dt-avoid-spurious-restarts-on-ar934x.patch | 55 +++++++++ ...ring-the-prescaler-with-the-watchdog.patch | 34 ++++++ ...hdog_device-timeout-from-set_timeout.patch | 50 +++++++++ 7 files changed, 387 insertions(+) create mode 100644 queue-3.14/kernel-watchdog.c-remove-preemption-restrictions-when-restarting-lockup-detector.patch create mode 100644 queue-3.14/ubifs-fix-an-mmap-and-fsync-race-condition.patch create mode 100644 queue-3.14/ubifs-remove-incorrect-assertion-in-shrink_tnc.patch create mode 100644 queue-3.14/watchdog-ath79_wdt-avoid-spurious-restarts-on-ar934x.patch create mode 100644 queue-3.14/watchdog-kempld-wdt-use-the-correct-value-when-configuring-the-prescaler-with-the-watchdog.patch create mode 100644 queue-3.14/watchdog-sp805-set-watchdog_device-timeout-from-set_timeout.patch diff --git a/queue-3.14/kernel-watchdog.c-remove-preemption-restrictions-when-restarting-lockup-detector.patch b/queue-3.14/kernel-watchdog.c-remove-preemption-restrictions-when-restarting-lockup-detector.patch new file mode 100644 index 00000000000..6df94696976 --- /dev/null +++ b/queue-3.14/kernel-watchdog.c-remove-preemption-restrictions-when-restarting-lockup-detector.patch @@ -0,0 +1,75 @@ +From bde92cf455a03a91badb7046855592d8c008e929 Mon Sep 17 00:00:00 2001 +From: Don Zickus +Date: Mon, 23 Jun 2014 13:22:03 -0700 +Subject: kernel/watchdog.c: remove preemption restrictions when restarting lockup detector + +From: Don Zickus + +commit bde92cf455a03a91badb7046855592d8c008e929 upstream. + +Peter Wu noticed the following splat on his machine when updating +/proc/sys/kernel/watchdog_thresh: + + BUG: sleeping function called from invalid context at mm/slub.c:965 + in_atomic(): 1, irqs_disabled(): 0, pid: 1, name: init + 3 locks held by init/1: + #0: (sb_writers#3){.+.+.+}, at: [] vfs_write+0x143/0x180 + #1: (watchdog_proc_mutex){+.+.+.}, at: [] proc_dowatchdog+0x33/0x110 + #2: (cpu_hotplug.lock){.+.+.+}, at: [] get_online_cpus+0x32/0x80 + Preemption disabled at:[] proc_dowatchdog+0xe4/0x110 + + CPU: 0 PID: 1 Comm: init Not tainted 3.16.0-rc1-testing #34 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 + Call Trace: + dump_stack+0x4e/0x7a + __might_sleep+0x11d/0x190 + kmem_cache_alloc_trace+0x4e/0x1e0 + perf_event_alloc+0x55/0x440 + perf_event_create_kernel_counter+0x26/0xe0 + watchdog_nmi_enable+0x75/0x140 + update_timers_all_cpus+0x53/0xa0 + proc_dowatchdog+0xe4/0x110 + proc_sys_call_handler+0xb3/0xc0 + proc_sys_write+0x14/0x20 + vfs_write+0xad/0x180 + SyS_write+0x49/0xb0 + system_call_fastpath+0x16/0x1b + NMI watchdog: disabled (cpu0): hardware events not enabled + +What happened is after updating the watchdog_thresh, the lockup detector +is restarted to utilize the new value. Part of this process involved +disabling preemption. Once preemption was disabled, perf tried to +allocate a new event (as part of the restart). This caused the above +BUG_ON as you can't sleep with preemption disabled. + +The preemption restriction seemed agressive as we are not doing anything +on that particular cpu, but with all the online cpus (which are +protected by the get_online_cpus lock). Remove the restriction and the +BUG_ON goes away. + +Signed-off-by: Don Zickus +Acked-by: Michal Hocko +Reported-by: Peter Wu +Tested-by: Peter Wu +Acked-by: David Rientjes +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/watchdog.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/kernel/watchdog.c ++++ b/kernel/watchdog.c +@@ -524,10 +524,8 @@ static void update_timers_all_cpus(void) + int cpu; + + get_online_cpus(); +- preempt_disable(); + for_each_online_cpu(cpu) + update_timers(cpu); +- preempt_enable(); + put_online_cpus(); + } + diff --git a/queue-3.14/series b/queue-3.14/series index ca682185f65..be0203321b5 100644 --- a/queue-3.14/series +++ b/queue-3.14/series @@ -13,3 +13,9 @@ mtip32xx-increase-timeout-for-standby-immediate-command.patch mtip32xx-remove-dfs_parent-after-pci-unregister.patch recordmcount-mips-fix-possible-incorrect-mcount_loc-table-entries-in-modules.patch mips-msc-prevent-out-of-bounds-writes-to-mips-sc-ioremap-d-region.patch +ubifs-fix-an-mmap-and-fsync-race-condition.patch +ubifs-remove-incorrect-assertion-in-shrink_tnc.patch +watchdog-sp805-set-watchdog_device-timeout-from-set_timeout.patch +watchdog-ath79_wdt-avoid-spurious-restarts-on-ar934x.patch +watchdog-kempld-wdt-use-the-correct-value-when-configuring-the-prescaler-with-the-watchdog.patch +kernel-watchdog.c-remove-preemption-restrictions-when-restarting-lockup-detector.patch diff --git a/queue-3.14/ubifs-fix-an-mmap-and-fsync-race-condition.patch b/queue-3.14/ubifs-fix-an-mmap-and-fsync-race-condition.patch new file mode 100644 index 00000000000..909d2c7004a --- /dev/null +++ b/queue-3.14/ubifs-fix-an-mmap-and-fsync-race-condition.patch @@ -0,0 +1,63 @@ +From 691a7c6f28ac90cccd0dbcf81348ea90b211bdd0 Mon Sep 17 00:00:00 2001 +From: hujianyang +Date: Wed, 30 Apr 2014 14:06:06 +0800 +Subject: UBIFS: fix an mmap and fsync race condition + +From: hujianyang + +commit 691a7c6f28ac90cccd0dbcf81348ea90b211bdd0 upstream. + +There is a race condition in UBIFS: + +Thread A (mmap) Thread B (fsync) + +->__do_fault ->write_cache_pages + -> ubifs_vm_page_mkwrite + -> budget_space + -> lock_page + -> release/convert_page_budget + -> SetPagePrivate + -> TestSetPageDirty + -> unlock_page + -> lock_page + -> TestClearPageDirty + -> ubifs_writepage + -> do_writepage + -> release_budget + -> ClearPagePrivate + -> unlock_page + -> !(ret & VM_FAULT_LOCKED) + -> lock_page + -> set_page_dirty + -> ubifs_set_page_dirty + -> TestSetPageDirty (set page dirty without budgeting) + -> unlock_page + +This leads to situation where we have a diry page but no budget allocated for +this page, so further write-back may fail with -ENOSPC. + +In this fix we return from page_mkwrite without performing unlock_page. We +return VM_FAULT_LOCKED instead. After doing this, the race above will not +happen. + +Signed-off-by: hujianyang +Tested-by: Laurence Withers +Signed-off-by: Artem Bityutskiy +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ubifs/file.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/fs/ubifs/file.c ++++ b/fs/ubifs/file.c +@@ -1525,8 +1525,7 @@ static int ubifs_vm_page_mkwrite(struct + } + + wait_for_stable_page(page); +- unlock_page(page); +- return 0; ++ return VM_FAULT_LOCKED; + + out_unlock: + unlock_page(page); diff --git a/queue-3.14/ubifs-remove-incorrect-assertion-in-shrink_tnc.patch b/queue-3.14/ubifs-remove-incorrect-assertion-in-shrink_tnc.patch new file mode 100644 index 00000000000..a87039b5d7e --- /dev/null +++ b/queue-3.14/ubifs-remove-incorrect-assertion-in-shrink_tnc.patch @@ -0,0 +1,104 @@ +From 72abc8f4b4e8574318189886de627a2bfe6cd0da Mon Sep 17 00:00:00 2001 +From: hujianyang +Date: Sat, 31 May 2014 11:39:32 +0800 +Subject: UBIFS: Remove incorrect assertion in shrink_tnc() + +From: hujianyang + +commit 72abc8f4b4e8574318189886de627a2bfe6cd0da upstream. + +I hit the same assert failed as Dolev Raviv reported in Kernel v3.10 +shows like this: + +[ 9641.164028] UBIFS assert failed in shrink_tnc at 131 (pid 13297) +[ 9641.234078] CPU: 1 PID: 13297 Comm: mmap.test Tainted: G O 3.10.40 #1 +[ 9641.234116] [] (unwind_backtrace+0x0/0x12c) from [] (show_stack+0x20/0x24) +[ 9641.234137] [] (show_stack+0x20/0x24) from [] (dump_stack+0x20/0x28) +[ 9641.234188] [] (dump_stack+0x20/0x28) from [] (shrink_tnc_trees+0x25c/0x350 [ubifs]) +[ 9641.234265] [] (shrink_tnc_trees+0x25c/0x350 [ubifs]) from [] (ubifs_shrinker+0x25c/0x310 [ubifs]) +[ 9641.234307] [] (ubifs_shrinker+0x25c/0x310 [ubifs]) from [] (shrink_slab+0x1d4/0x2f8) +[ 9641.234327] [] (shrink_slab+0x1d4/0x2f8) from [] (do_try_to_free_pages+0x300/0x544) +[ 9641.234344] [] (do_try_to_free_pages+0x300/0x544) from [] (try_to_free_pages+0x2d0/0x398) +[ 9641.234363] [] (try_to_free_pages+0x2d0/0x398) from [] (__alloc_pages_nodemask+0x494/0x7e8) +[ 9641.234382] [] (__alloc_pages_nodemask+0x494/0x7e8) from [] (new_slab+0x78/0x238) +[ 9641.234400] [] (new_slab+0x78/0x238) from [] (__slab_alloc.constprop.42+0x1a4/0x50c) +[ 9641.234419] [] (__slab_alloc.constprop.42+0x1a4/0x50c) from [] (kmem_cache_alloc_trace+0x54/0x188) +[ 9641.234459] [] (kmem_cache_alloc_trace+0x54/0x188) from [] (do_readpage+0x168/0x468 [ubifs]) +[ 9641.234553] [] (do_readpage+0x168/0x468 [ubifs]) from [] (ubifs_readpage+0x424/0x464 [ubifs]) +[ 9641.234606] [] (ubifs_readpage+0x424/0x464 [ubifs]) from [] (filemap_fault+0x304/0x418) +[ 9641.234638] [] (filemap_fault+0x304/0x418) from [] (__do_fault+0xd4/0x530) +[ 9641.234665] [] (__do_fault+0xd4/0x530) from [] (handle_pte_fault+0x480/0xf54) +[ 9641.234690] [] (handle_pte_fault+0x480/0xf54) from [] (handle_mm_fault+0x140/0x184) +[ 9641.234716] [] (handle_mm_fault+0x140/0x184) from [] (do_page_fault+0x150/0x3ac) +[ 9641.234737] [] (do_page_fault+0x150/0x3ac) from [] (do_DataAbort+0x3c/0xa0) +[ 9641.234759] [] (do_DataAbort+0x3c/0xa0) from [] (__dabt_usr+0x38/0x40) + +After analyzing the code, I found a condition that may cause this failed +in correct operations. Thus, I think this assertion is wrong and should be +removed. + +Suppose there are two clean znodes and one dirty znode in TNC. So the +per-filesystem atomic_t @clean_zn_cnt is (2). If commit start, dirty_znode +is set to COW_ZNODE in get_znodes_to_commit() in case of potentially ops +on this znode. We clear COW bit and DIRTY bit in write_index() without +@tnc_mutex locked. We don't increase @clean_zn_cnt in this place. As the +comments in write_index() shows, if another process hold @tnc_mutex and +dirty this znode after we clean it, @clean_zn_cnt would be decreased to (1). +We will increase @clean_zn_cnt to (2) with @tnc_mutex locked in +free_obsolete_znodes() to keep it right. + +If shrink_tnc() performs between decrease and increase, it will release +other 2 clean znodes it holds and found @clean_zn_cnt is less than zero +(1 - 2 = -1), then hit the assertion. Because free_obsolete_znodes() will +soon correct @clean_zn_cnt and no harm to fs in this case, I think this +assertion could be removed. + +2 clean zondes and 1 dirty znode, @clean_zn_cnt == 2 + +Thread A (commit) Thread B (write or others) Thread C (shrinker) +->write_index + ->clear_bit(DIRTY_NODE) + ->clear_bit(COW_ZNODE) + + @clean_zn_cnt == 2 + ->mutex_locked(&tnc_mutex) + ->dirty_cow_znode + ->!ubifs_zn_cow(znode) + ->!test_and_set_bit(DIRTY_NODE) + ->atomic_dec(&clean_zn_cnt) + ->mutex_unlocked(&tnc_mutex) + + @clean_zn_cnt == 1 + ->mutex_locked(&tnc_mutex) + ->shrink_tnc + ->destroy_tnc_subtree + ->atomic_sub(&clean_zn_cnt, 2) + ->ubifs_assert <- hit + ->mutex_unlocked(&tnc_mutex) + + @clean_zn_cnt == -1 +->mutex_lock(&tnc_mutex) +->free_obsolete_znodes + ->atomic_inc(&clean_zn_cnt) +->mutux_unlock(&tnc_mutex) + + @clean_zn_cnt == 0 (correct after shrink) + +Signed-off-by: hujianyang +Signed-off-by: Artem Bityutskiy +Signed-off-by: Greg Kroah-Hartman + +--- + fs/ubifs/shrinker.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/fs/ubifs/shrinker.c ++++ b/fs/ubifs/shrinker.c +@@ -128,7 +128,6 @@ static int shrink_tnc(struct ubifs_info + freed = ubifs_destroy_tnc_subtree(znode); + atomic_long_sub(freed, &ubifs_clean_zn_cnt); + atomic_long_sub(freed, &c->clean_zn_cnt); +- ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0); + total_freed += freed; + znode = zprev; + } diff --git a/queue-3.14/watchdog-ath79_wdt-avoid-spurious-restarts-on-ar934x.patch b/queue-3.14/watchdog-ath79_wdt-avoid-spurious-restarts-on-ar934x.patch new file mode 100644 index 00000000000..84675053a09 --- /dev/null +++ b/queue-3.14/watchdog-ath79_wdt-avoid-spurious-restarts-on-ar934x.patch @@ -0,0 +1,55 @@ +From 23afeb613ec0e10aecfae7838a14d485db62ac52 Mon Sep 17 00:00:00 2001 +From: Gabor Juhos +Date: Wed, 16 Apr 2014 11:34:41 +0200 +Subject: watchdog: ath79_wdt: avoid spurious restarts on AR934x + +From: Gabor Juhos + +commit 23afeb613ec0e10aecfae7838a14d485db62ac52 upstream. + +On some AR934x based systems, where the frequency of +the AHB bus is relatively high, the built-in watchdog +causes a spurious restart when it gets enabled. + +The possible cause of these restarts is that the timeout +value written into the TIMER register does not reaches +the hardware in time. + +Add an explicit delay into the ath79_wdt_enable function +to avoid the spurious restarts. + +Signed-off-by: Gabor Juhos +Reviewed-by: Guenter Roeck +Signed-off-by: Wim Van Sebroeck +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/watchdog/ath79_wdt.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/drivers/watchdog/ath79_wdt.c ++++ b/drivers/watchdog/ath79_wdt.c +@@ -20,6 +20,7 @@ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + + #include ++#include + #include + #include + #include +@@ -91,6 +92,15 @@ static inline void ath79_wdt_keepalive(v + static inline void ath79_wdt_enable(void) + { + ath79_wdt_keepalive(); ++ ++ /* ++ * Updating the TIMER register requires a few microseconds ++ * on the AR934x SoCs at least. Use a small delay to ensure ++ * that the TIMER register is updated within the hardware ++ * before enabling the watchdog. ++ */ ++ udelay(2); ++ + ath79_wdt_wr(WDOG_REG_CTRL, WDOG_CTRL_ACTION_FCR); + /* flush write */ + ath79_wdt_rr(WDOG_REG_CTRL); diff --git a/queue-3.14/watchdog-kempld-wdt-use-the-correct-value-when-configuring-the-prescaler-with-the-watchdog.patch b/queue-3.14/watchdog-kempld-wdt-use-the-correct-value-when-configuring-the-prescaler-with-the-watchdog.patch new file mode 100644 index 00000000000..071831cf95a --- /dev/null +++ b/queue-3.14/watchdog-kempld-wdt-use-the-correct-value-when-configuring-the-prescaler-with-the-watchdog.patch @@ -0,0 +1,34 @@ +From a9e0436b303e94ba57d3bd4b1fcbeaa744b7ebeb Mon Sep 17 00:00:00 2001 +From: gundberg +Date: Thu, 24 Apr 2014 15:49:19 +0200 +Subject: watchdog: kempld-wdt: Use the correct value when configuring the prescaler with the watchdog + +From: gundberg + +commit a9e0436b303e94ba57d3bd4b1fcbeaa744b7ebeb upstream. + +Use the prescaler index, rather than its value, to configure the watchdog. +This will prevent a mismatch with the prescaler used to calculate the cycles. + +Signed-off-by: Per Gundberg +Reviewed-by: Guenter Roeck +Reviewed-by: Michael Brunner +Tested-by: Michael Brunner +Signed-off-by: Wim Van Sebroeck +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/watchdog/kempld_wdt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/watchdog/kempld_wdt.c ++++ b/drivers/watchdog/kempld_wdt.c +@@ -162,7 +162,7 @@ static int kempld_wdt_set_stage_timeout( + kempld_get_mutex(pld); + stage_cfg = kempld_read8(pld, KEMPLD_WDT_STAGE_CFG(stage->id)); + stage_cfg &= ~STAGE_CFG_PRESCALER_MASK; +- stage_cfg |= STAGE_CFG_SET_PRESCALER(prescaler); ++ stage_cfg |= STAGE_CFG_SET_PRESCALER(PRESCALER_21); + kempld_write8(pld, KEMPLD_WDT_STAGE_CFG(stage->id), stage_cfg); + kempld_write32(pld, KEMPLD_WDT_STAGE_TIMEOUT(stage->id), + stage_timeout); diff --git a/queue-3.14/watchdog-sp805-set-watchdog_device-timeout-from-set_timeout.patch b/queue-3.14/watchdog-sp805-set-watchdog_device-timeout-from-set_timeout.patch new file mode 100644 index 00000000000..d44414fe773 --- /dev/null +++ b/queue-3.14/watchdog-sp805-set-watchdog_device-timeout-from-set_timeout.patch @@ -0,0 +1,50 @@ +From 938626d96a3ffb9eb54552bb0d3a4f2b30ffdeb0 Mon Sep 17 00:00:00 2001 +From: Viresh Kumar +Date: Thu, 15 May 2014 10:01:59 +0530 +Subject: watchdog: sp805: Set watchdog_device->timeout from ->set_timeout() + +From: Viresh Kumar + +commit 938626d96a3ffb9eb54552bb0d3a4f2b30ffdeb0 upstream. + +Implementation of ->set_timeout() is supposed to set 'timeout' field of 'struct +watchdog_device' passed to it. sp805 was rather setting this in a local +variable. Fix it. + +Reported-by: Arun Ramamurthy +Signed-off-by: Viresh Kumar +Reviewed-by: Guenter Roeck +Signed-off-by: Wim Van Sebroeck +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/watchdog/sp805_wdt.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/drivers/watchdog/sp805_wdt.c ++++ b/drivers/watchdog/sp805_wdt.c +@@ -60,7 +60,6 @@ + * @adev: amba device structure of wdt + * @status: current status of wdt + * @load_val: load value to be set for current timeout +- * @timeout: current programmed timeout + */ + struct sp805_wdt { + struct watchdog_device wdd; +@@ -69,7 +68,6 @@ struct sp805_wdt { + struct clk *clk; + struct amba_device *adev; + unsigned int load_val; +- unsigned int timeout; + }; + + static bool nowayout = WATCHDOG_NOWAYOUT; +@@ -99,7 +97,7 @@ static int wdt_setload(struct watchdog_d + spin_lock(&wdt->lock); + wdt->load_val = load; + /* roundup timeout to closest positive integer value */ +- wdt->timeout = div_u64((load + 1) * 2 + (rate / 2), rate); ++ wdd->timeout = div_u64((load + 1) * 2 + (rate / 2), rate); + spin_unlock(&wdt->lock); + + return 0; -- 2.47.3