From: Greg Kroah-Hartman Date: Wed, 29 Apr 2015 12:10:36 +0000 (+0200) Subject: 3.10-stable patches X-Git-Tag: v3.10.77~63 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=75d10a18c1b26616f708a94417b462d702d2df91;p=thirdparty%2Fkernel%2Fstable-queue.git 3.10-stable patches added patches: btrfs-fix-inode-eviction-infinite-loop-after-cloning-into-it.patch btrfs-fix-log-tree-corruption-when-fs-mounted-with-o-discard.patch sched-idle-x86-restore-mwait_idle-to-fix-boot-hangs-to-improve-power-savings-and-to-improve-performance.patch --- diff --git a/queue-3.10/btrfs-fix-inode-eviction-infinite-loop-after-cloning-into-it.patch b/queue-3.10/btrfs-fix-inode-eviction-infinite-loop-after-cloning-into-it.patch new file mode 100644 index 00000000000..832f730f472 --- /dev/null +++ b/queue-3.10/btrfs-fix-inode-eviction-infinite-loop-after-cloning-into-it.patch @@ -0,0 +1,103 @@ +From ccccf3d67294714af2d72a6fd6fd7d73b01c9329 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 30 Mar 2015 18:23:59 +0100 +Subject: Btrfs: fix inode eviction infinite loop after cloning into it + +From: Filipe Manana + +commit ccccf3d67294714af2d72a6fd6fd7d73b01c9329 upstream. + +If we attempt to clone a 0 length region into a file we can end up +inserting a range in the inode's extent_io tree with a start offset +that is greater then the end offset, which triggers immediately the +following warning: + +[ 3914.619057] WARNING: CPU: 17 PID: 4199 at fs/btrfs/extent_io.c:435 insert_state+0x4b/0x10b [btrfs]() +[ 3914.620886] BTRFS: end < start 4095 4096 +(...) +[ 3914.638093] Call Trace: +[ 3914.638636] [] dump_stack+0x4c/0x65 +[ 3914.639620] [] warn_slowpath_common+0xa1/0xbb +[ 3914.640789] [] ? insert_state+0x4b/0x10b [btrfs] +[ 3914.642041] [] warn_slowpath_fmt+0x46/0x48 +[ 3914.643236] [] insert_state+0x4b/0x10b [btrfs] +[ 3914.644441] [] __set_extent_bit+0x107/0x3f4 [btrfs] +[ 3914.645711] [] lock_extent_bits+0x65/0x1bf [btrfs] +[ 3914.646914] [] ? _raw_spin_unlock+0x28/0x33 +[ 3914.648058] [] ? test_range_bit+0xcc/0xde [btrfs] +[ 3914.650105] [] lock_extent+0x13/0x15 [btrfs] +[ 3914.651361] [] lock_extent_range+0x3d/0xcd [btrfs] +[ 3914.652761] [] btrfs_ioctl_clone+0x278/0x388 [btrfs] +[ 3914.654128] [] ? might_fault+0x58/0xb5 +[ 3914.655320] [] btrfs_ioctl+0xb51/0x2195 [btrfs] +(...) +[ 3914.669271] ---[ end trace 14843d3e2e622fc1 ]--- + +This later makes the inode eviction handler enter an infinite loop that +keeps dumping the following warning over and over: + +[ 3915.117629] WARNING: CPU: 22 PID: 4228 at fs/btrfs/extent_io.c:435 insert_state+0x4b/0x10b [btrfs]() +[ 3915.119913] BTRFS: end < start 4095 4096 +(...) +[ 3915.137394] Call Trace: +[ 3915.137913] [] dump_stack+0x4c/0x65 +[ 3915.139154] [] warn_slowpath_common+0xa1/0xbb +[ 3915.140316] [] ? insert_state+0x4b/0x10b [btrfs] +[ 3915.141505] [] warn_slowpath_fmt+0x46/0x48 +[ 3915.142709] [] insert_state+0x4b/0x10b [btrfs] +[ 3915.143849] [] __set_extent_bit+0x107/0x3f4 [btrfs] +[ 3915.145120] [] ? btrfs_kill_super+0x17/0x23 [btrfs] +[ 3915.146352] [] ? deactivate_locked_super+0x3b/0x50 +[ 3915.147565] [] lock_extent_bits+0x65/0x1bf [btrfs] +[ 3915.148785] [] ? _raw_write_unlock+0x28/0x33 +[ 3915.149931] [] btrfs_evict_inode+0x196/0x482 [btrfs] +[ 3915.151154] [] evict+0xa0/0x148 +[ 3915.152094] [] dispose_list+0x39/0x43 +[ 3915.153081] [] evict_inodes+0xdc/0xeb +[ 3915.154062] [] generic_shutdown_super+0x49/0xef +[ 3915.155193] [] kill_anon_super+0x13/0x1e +[ 3915.156274] [] btrfs_kill_super+0x17/0x23 [btrfs] +(...) +[ 3915.167404] ---[ end trace 14843d3e2e622fc2 ]--- + +So just bail out of the clone ioctl if the length of the region to clone +is zero, without locking any extent range, in order to prevent this issue +(same behaviour as a pwrite with a 0 length for example). + +This is trivial to reproduce. For example, the steps for the test I just +made for fstests: + + mkfs.btrfs -f SCRATCH_DEV + mount SCRATCH_DEV $SCRATCH_MNT + + touch $SCRATCH_MNT/foo + touch $SCRATCH_MNT/bar + + $CLONER_PROG -s 0 -d 4096 -l 0 $SCRATCH_MNT/foo $SCRATCH_MNT/bar + umount $SCRATCH_MNT + +A test case for fstests follows soon. + +Signed-off-by: Filipe Manana +Reviewed-by: Omar Sandoval +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/ioctl.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -2572,6 +2572,11 @@ static noinline long btrfs_ioctl_clone(s + if (off + len == src->i_size) + len = ALIGN(src->i_size, bs) - off; + ++ if (len == 0) { ++ ret = 0; ++ goto out_unlock; ++ } ++ + /* verify the end result is block aligned */ + if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || + !IS_ALIGNED(destoff, bs)) diff --git a/queue-3.10/btrfs-fix-log-tree-corruption-when-fs-mounted-with-o-discard.patch b/queue-3.10/btrfs-fix-log-tree-corruption-when-fs-mounted-with-o-discard.patch new file mode 100644 index 00000000000..e0f232182da --- /dev/null +++ b/queue-3.10/btrfs-fix-log-tree-corruption-when-fs-mounted-with-o-discard.patch @@ -0,0 +1,57 @@ +From dcc82f4783ad91d4ab654f89f37ae9291cdc846a Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 23 Mar 2015 14:07:40 +0000 +Subject: Btrfs: fix log tree corruption when fs mounted with -o discard + +From: Filipe Manana + +commit dcc82f4783ad91d4ab654f89f37ae9291cdc846a upstream. + +While committing a transaction we free the log roots before we write the +new super block. Freeing the log roots implies marking the disk location +of every node/leaf (metadata extent) as pinned before the new super block +is written. This is to prevent the disk location of log metadata extents +from being reused before the new super block is written, otherwise we +would have a corrupted log tree if before the new super block is written +a crash/reboot happens and the location of any log tree metadata extent +ended up being reused and rewritten. + +Even though we pinned the log tree's metadata extents, we were issuing a +discard against them if the fs was mounted with the -o discard option, +resulting in corruption of the log tree if a crash/reboot happened before +writing the new super block - the next time the fs was mounted, during +the log replay process we would find nodes/leafs of the log btree with +a content full of zeroes, causing the process to fail and require the +use of the tool btrfs-zero-log to wipeout the log tree (and all data +previously fsynced becoming lost forever). + +Fix this by not doing a discard when pinning an extent. The discard will +be done later when it's safe (after the new super block is committed) at +extent-tree.c:btrfs_finish_extent_commit(). + +Fixes: e688b7252f78 (Btrfs: fix extent pinning bugs in the tree log) +Signed-off-by: Filipe Manana +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/extent-tree.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -6363,12 +6363,11 @@ static int __btrfs_free_reserved_extent( + return -ENOSPC; + } + +- if (btrfs_test_opt(root, DISCARD)) +- ret = btrfs_discard_extent(root, start, len, NULL); +- + if (pin) + pin_down_extent(root, cache, start, len, 1); + else { ++ if (btrfs_test_opt(root, DISCARD)) ++ ret = btrfs_discard_extent(root, start, len, NULL); + btrfs_add_free_space(cache, start, len); + btrfs_update_reserved_bytes(cache, len, RESERVE_FREE); + } diff --git a/queue-3.10/sched-idle-x86-restore-mwait_idle-to-fix-boot-hangs-to-improve-power-savings-and-to-improve-performance.patch b/queue-3.10/sched-idle-x86-restore-mwait_idle-to-fix-boot-hangs-to-improve-power-savings-and-to-improve-performance.patch new file mode 100644 index 00000000000..d074721e240 --- /dev/null +++ b/queue-3.10/sched-idle-x86-restore-mwait_idle-to-fix-boot-hangs-to-improve-power-savings-and-to-improve-performance.patch @@ -0,0 +1,151 @@ +From b253149b843f89cd300cbdbea27ce1f847506f99 Mon Sep 17 00:00:00 2001 +From: Len Brown +Date: Wed, 15 Jan 2014 00:37:34 -0500 +Subject: sched/idle/x86: Restore mwait_idle() to fix boot hangs, to improve power savings and to improve performance + +From: Len Brown + +commit b253149b843f89cd300cbdbea27ce1f847506f99 upstream. + +In Linux-3.9 we removed the mwait_idle() loop: + + 69fb3676df33 ("x86 idle: remove mwait_idle() and "idle=mwait" cmdline param") + +The reasoning was that modern machines should be sufficiently +happy during the boot process using the default_idle() HALT +loop, until cpuidle loads and either acpi_idle or intel_idle +invoke the newer MWAIT-with-hints idle loop. + +But two machines reported problems: + + 1. Certain Core2-era machines support MWAIT-C1 and HALT only. + MWAIT-C1 is preferred for optimal power and performance. + But if they support just C1, cpuidle never loads and + so they use the boot-time default idle loop forever. + + 2. Some laptops will boot-hang if HALT is used, + but will boot successfully if MWAIT is used. + This appears to be a hidden assumption in BIOS SMI, + that is presumably valid on the proprietary OS + where the BIOS was validated. + + https://bugzilla.kernel.org/show_bug.cgi?id=60770 + +So here we effectively revert the patch above, restoring +the mwait_idle() loop. However, we don't bother restoring +the idle=mwait cmdline parameter, since it appears to add +no value. + +Maintainer notes: + + For 3.9, simply revert 69fb3676df + for 3.10, patch -F3 applies, fuzz needed due to __cpuinit use in + context For 3.11, 3.12, 3.13, this patch applies cleanly + +Tested-by: Mike Galbraith +Signed-off-by: Len Brown +Acked-by: Mike Galbraith +Cc: Borislav Petkov +Cc: H. Peter Anvin +Cc: Ian Malone +Cc: Josh Boyer +Cc: Linus Torvalds +Cc: Mike Galbraith +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/345254a551eb5a6a866e048d7ab570fd2193aca4.1389763084.git.len.brown@intel.com +[ Ported to recent kernels. ] +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h +index a1410db38a1a..653dfa7662e1 100644 +--- a/arch/x86/include/asm/mwait.h ++++ b/arch/x86/include/asm/mwait.h +@@ -30,6 +30,14 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) + :: "a" (eax), "c" (ecx)); + } + ++static inline void __sti_mwait(unsigned long eax, unsigned long ecx) ++{ ++ trace_hardirqs_on(); ++ /* "mwait %eax, %ecx;" */ ++ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" ++ :: "a" (eax), "c" (ecx)); ++} ++ + /* + * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, + * which can obviate IPI to trigger checking of need_resched. +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c +index e127ddaa2d5a..da06f741d2a6 100644 +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -398,6 +399,49 @@ static void amd_e400_idle(void) + default_idle(); + } + ++/* ++ * Intel Core2 and older machines prefer MWAIT over HALT for C1. ++ * We can't rely on cpuidle installing MWAIT, because it will not load ++ * on systems that support only C1 -- so the boot default must be MWAIT. ++ * ++ * Some AMD machines are the opposite, they depend on using HALT. ++ * ++ * So for default C1, which is used during boot until cpuidle loads, ++ * use MWAIT-C1 on Intel HW that has it, else use HALT. ++ */ ++static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) ++{ ++ if (c->x86_vendor != X86_VENDOR_INTEL) ++ return 0; ++ ++ if (!cpu_has(c, X86_FEATURE_MWAIT)) ++ return 0; ++ ++ return 1; ++} ++ ++/* ++ * MONITOR/MWAIT with no hints, used for default default C1 state. ++ * This invokes MWAIT with interrutps enabled and no flags, ++ * which is backwards compatible with the original MWAIT implementation. ++ */ ++ ++static void mwait_idle(void) ++{ ++ if (!need_resched()) { ++ if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) ++ clflush((void *)¤t_thread_info()->flags); ++ ++ __monitor((void *)¤t_thread_info()->flags, 0, 0); ++ smp_mb(); ++ if (!need_resched()) ++ __sti_mwait(0, 0); ++ else ++ local_irq_enable(); ++ } else ++ local_irq_enable(); ++} ++ + void select_idle_routine(const struct cpuinfo_x86 *c) + { + #ifdef CONFIG_SMP +@@ -411,6 +455,9 @@ void select_idle_routine(const struct cpuinfo_x86 *c) + /* E400: APIC timer interrupt does not wake up CPU from C1e */ + pr_info("using AMD E400 aware idle routine\n"); + x86_idle = amd_e400_idle; ++ } else if (prefer_mwait_c1_over_halt(c)) { ++ pr_info("using mwait in idle threads\n"); ++ x86_idle = mwait_idle; + } else + x86_idle = default_idle; + } diff --git a/queue-3.10/series b/queue-3.10/series index c6d3b6a5eac..75f3f14b8e2 100644 --- a/queue-3.10/series +++ b/queue-3.10/series @@ -1,3 +1,6 @@ ip_forward-drop-frames-with-attached-skb-sk.patch tcp-fix-possible-deadlock-in-tcp_send_fin.patch tcp-avoid-looping-in-tcp_send_fin.patch +btrfs-fix-log-tree-corruption-when-fs-mounted-with-o-discard.patch +btrfs-fix-inode-eviction-infinite-loop-after-cloning-into-it.patch +sched-idle-x86-restore-mwait_idle-to-fix-boot-hangs-to-improve-power-savings-and-to-improve-performance.patch