From 6cf04451bd2f13ca4e3b4f9c7f5d89428d19c562 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 29 Apr 2015 14:10:57 +0200 Subject: [PATCH] 3.14-stable patches added patches: btrfs-don-t-accept-bare-namespace-as-a-valid-xattr.patch btrfs-fix-inode-eviction-infinite-loop-after-cloning-into-it.patch btrfs-fix-inode-eviction-infinite-loop-after-extent_same-ioctl.patch btrfs-fix-log-tree-corruption-when-fs-mounted-with-o-discard.patch sched-idle-x86-optimize-unnecessary-mwait_idle-resched-ipis.patch sched-idle-x86-restore-mwait_idle-to-fix-boot-hangs-to-improve-power-savings-and-to-improve-performance.patch --- ...cept-bare-namespace-as-a-valid-xattr.patch | 134 +++++++++++++++ ...-infinite-loop-after-cloning-into-it.patch | 103 ++++++++++++ ...nfinite-loop-after-extent_same-ioctl.patch | 49 ++++++ ...ption-when-fs-mounted-with-o-discard.patch | 57 +++++++ ...-unnecessary-mwait_idle-resched-ipis.patch | 72 +++++++++ ...r-savings-and-to-improve-performance.patch | 152 ++++++++++++++++++ queue-3.14/series | 6 + 7 files changed, 573 insertions(+) create mode 100644 queue-3.14/btrfs-don-t-accept-bare-namespace-as-a-valid-xattr.patch create mode 100644 queue-3.14/btrfs-fix-inode-eviction-infinite-loop-after-cloning-into-it.patch create mode 100644 queue-3.14/btrfs-fix-inode-eviction-infinite-loop-after-extent_same-ioctl.patch create mode 100644 queue-3.14/btrfs-fix-log-tree-corruption-when-fs-mounted-with-o-discard.patch create mode 100644 queue-3.14/sched-idle-x86-optimize-unnecessary-mwait_idle-resched-ipis.patch create mode 100644 queue-3.14/sched-idle-x86-restore-mwait_idle-to-fix-boot-hangs-to-improve-power-savings-and-to-improve-performance.patch diff --git a/queue-3.14/btrfs-don-t-accept-bare-namespace-as-a-valid-xattr.patch b/queue-3.14/btrfs-don-t-accept-bare-namespace-as-a-valid-xattr.patch new file mode 100644 index 00000000000..b5e8d7c5627 --- /dev/null +++ b/queue-3.14/btrfs-don-t-accept-bare-namespace-as-a-valid-xattr.patch @@ -0,0 +1,134 @@ +From 3c3b04d10ff1811a27f86684ccd2f5ba6983211d Mon Sep 17 00:00:00 2001 +From: David Sterba +Date: Wed, 25 Mar 2015 19:26:41 +0100 +Subject: btrfs: don't accept bare namespace as a valid xattr + +From: David Sterba + +commit 3c3b04d10ff1811a27f86684ccd2f5ba6983211d upstream. + +Due to insufficient check in btrfs_is_valid_xattr, this unexpectedly +works: + + $ touch file + $ setfattr -n user. -v 1 file + $ getfattr -d file +user.="1" + +ie. the missing attribute name after the namespace. + +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=94291 +Reported-by: William Douglas +Signed-off-by: David Sterba +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/xattr.c | 53 +++++++++++++++++++++++++++++++++++++++-------------- + 1 file changed, 39 insertions(+), 14 deletions(-) + +--- a/fs/btrfs/xattr.c ++++ b/fs/btrfs/xattr.c +@@ -324,22 +324,42 @@ const struct xattr_handler *btrfs_xattr_ + /* + * Check if the attribute is in a supported namespace. + * +- * This applied after the check for the synthetic attributes in the system ++ * This is applied after the check for the synthetic attributes in the system + * namespace. + */ +-static bool btrfs_is_valid_xattr(const char *name) ++static int btrfs_is_valid_xattr(const char *name) + { +- return !strncmp(name, XATTR_SECURITY_PREFIX, +- XATTR_SECURITY_PREFIX_LEN) || +- !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) || +- !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || +- !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) || +- !strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN); ++ int len = strlen(name); ++ int prefixlen = 0; ++ ++ if (!strncmp(name, XATTR_SECURITY_PREFIX, ++ XATTR_SECURITY_PREFIX_LEN)) ++ prefixlen = XATTR_SECURITY_PREFIX_LEN; ++ else if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) ++ prefixlen = XATTR_SYSTEM_PREFIX_LEN; ++ else if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) ++ prefixlen = XATTR_TRUSTED_PREFIX_LEN; ++ else if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) ++ prefixlen = XATTR_USER_PREFIX_LEN; ++ else if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN)) ++ prefixlen = XATTR_BTRFS_PREFIX_LEN; ++ else ++ return -EOPNOTSUPP; ++ ++ /* ++ * The name cannot consist of just prefix ++ */ ++ if (len <= prefixlen) ++ return -EINVAL; ++ ++ return 0; + } + + ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size) + { ++ int ret; ++ + /* + * If this is a request for a synthetic attribute in the system.* + * namespace use the generic infrastructure to resolve a handler +@@ -348,8 +368,9 @@ ssize_t btrfs_getxattr(struct dentry *de + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return generic_getxattr(dentry, name, buffer, size); + +- if (!btrfs_is_valid_xattr(name)) +- return -EOPNOTSUPP; ++ ret = btrfs_is_valid_xattr(name); ++ if (ret) ++ return ret; + return __btrfs_getxattr(dentry->d_inode, name, buffer, size); + } + +@@ -357,6 +378,7 @@ int btrfs_setxattr(struct dentry *dentry + size_t size, int flags) + { + struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; ++ int ret; + + /* + * The permission on security.* and system.* is not checked +@@ -373,8 +395,9 @@ int btrfs_setxattr(struct dentry *dentry + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return generic_setxattr(dentry, name, value, size, flags); + +- if (!btrfs_is_valid_xattr(name)) +- return -EOPNOTSUPP; ++ ret = btrfs_is_valid_xattr(name); ++ if (ret) ++ return ret; + + if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN)) + return btrfs_set_prop(dentry->d_inode, name, +@@ -390,6 +413,7 @@ int btrfs_setxattr(struct dentry *dentry + int btrfs_removexattr(struct dentry *dentry, const char *name) + { + struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; ++ int ret; + + /* + * The permission on security.* and system.* is not checked +@@ -406,8 +430,9 @@ int btrfs_removexattr(struct dentry *den + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return generic_removexattr(dentry, name); + +- if (!btrfs_is_valid_xattr(name)) +- return -EOPNOTSUPP; ++ ret = btrfs_is_valid_xattr(name); ++ if (ret) ++ return ret; + + if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN)) + return btrfs_set_prop(dentry->d_inode, name, diff --git a/queue-3.14/btrfs-fix-inode-eviction-infinite-loop-after-cloning-into-it.patch b/queue-3.14/btrfs-fix-inode-eviction-infinite-loop-after-cloning-into-it.patch new file mode 100644 index 00000000000..15ebc4b89ed --- /dev/null +++ b/queue-3.14/btrfs-fix-inode-eviction-infinite-loop-after-cloning-into-it.patch @@ -0,0 +1,103 @@ +From ccccf3d67294714af2d72a6fd6fd7d73b01c9329 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 30 Mar 2015 18:23:59 +0100 +Subject: Btrfs: fix inode eviction infinite loop after cloning into it + +From: Filipe Manana + +commit ccccf3d67294714af2d72a6fd6fd7d73b01c9329 upstream. + +If we attempt to clone a 0 length region into a file we can end up +inserting a range in the inode's extent_io tree with a start offset +that is greater then the end offset, which triggers immediately the +following warning: + +[ 3914.619057] WARNING: CPU: 17 PID: 4199 at fs/btrfs/extent_io.c:435 insert_state+0x4b/0x10b [btrfs]() +[ 3914.620886] BTRFS: end < start 4095 4096 +(...) +[ 3914.638093] Call Trace: +[ 3914.638636] [] dump_stack+0x4c/0x65 +[ 3914.639620] [] warn_slowpath_common+0xa1/0xbb +[ 3914.640789] [] ? insert_state+0x4b/0x10b [btrfs] +[ 3914.642041] [] warn_slowpath_fmt+0x46/0x48 +[ 3914.643236] [] insert_state+0x4b/0x10b [btrfs] +[ 3914.644441] [] __set_extent_bit+0x107/0x3f4 [btrfs] +[ 3914.645711] [] lock_extent_bits+0x65/0x1bf [btrfs] +[ 3914.646914] [] ? _raw_spin_unlock+0x28/0x33 +[ 3914.648058] [] ? test_range_bit+0xcc/0xde [btrfs] +[ 3914.650105] [] lock_extent+0x13/0x15 [btrfs] +[ 3914.651361] [] lock_extent_range+0x3d/0xcd [btrfs] +[ 3914.652761] [] btrfs_ioctl_clone+0x278/0x388 [btrfs] +[ 3914.654128] [] ? might_fault+0x58/0xb5 +[ 3914.655320] [] btrfs_ioctl+0xb51/0x2195 [btrfs] +(...) +[ 3914.669271] ---[ end trace 14843d3e2e622fc1 ]--- + +This later makes the inode eviction handler enter an infinite loop that +keeps dumping the following warning over and over: + +[ 3915.117629] WARNING: CPU: 22 PID: 4228 at fs/btrfs/extent_io.c:435 insert_state+0x4b/0x10b [btrfs]() +[ 3915.119913] BTRFS: end < start 4095 4096 +(...) +[ 3915.137394] Call Trace: +[ 3915.137913] [] dump_stack+0x4c/0x65 +[ 3915.139154] [] warn_slowpath_common+0xa1/0xbb +[ 3915.140316] [] ? insert_state+0x4b/0x10b [btrfs] +[ 3915.141505] [] warn_slowpath_fmt+0x46/0x48 +[ 3915.142709] [] insert_state+0x4b/0x10b [btrfs] +[ 3915.143849] [] __set_extent_bit+0x107/0x3f4 [btrfs] +[ 3915.145120] [] ? btrfs_kill_super+0x17/0x23 [btrfs] +[ 3915.146352] [] ? deactivate_locked_super+0x3b/0x50 +[ 3915.147565] [] lock_extent_bits+0x65/0x1bf [btrfs] +[ 3915.148785] [] ? _raw_write_unlock+0x28/0x33 +[ 3915.149931] [] btrfs_evict_inode+0x196/0x482 [btrfs] +[ 3915.151154] [] evict+0xa0/0x148 +[ 3915.152094] [] dispose_list+0x39/0x43 +[ 3915.153081] [] evict_inodes+0xdc/0xeb +[ 3915.154062] [] generic_shutdown_super+0x49/0xef +[ 3915.155193] [] kill_anon_super+0x13/0x1e +[ 3915.156274] [] btrfs_kill_super+0x17/0x23 [btrfs] +(...) +[ 3915.167404] ---[ end trace 14843d3e2e622fc2 ]--- + +So just bail out of the clone ioctl if the length of the region to clone +is zero, without locking any extent range, in order to prevent this issue +(same behaviour as a pwrite with a 0 length for example). + +This is trivial to reproduce. For example, the steps for the test I just +made for fstests: + + mkfs.btrfs -f SCRATCH_DEV + mount SCRATCH_DEV $SCRATCH_MNT + + touch $SCRATCH_MNT/foo + touch $SCRATCH_MNT/bar + + $CLONER_PROG -s 0 -d 4096 -l 0 $SCRATCH_MNT/foo $SCRATCH_MNT/bar + umount $SCRATCH_MNT + +A test case for fstests follows soon. + +Signed-off-by: Filipe Manana +Reviewed-by: Omar Sandoval +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/ioctl.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -3226,6 +3226,11 @@ static noinline long btrfs_ioctl_clone(s + if (off + len == src->i_size) + len = ALIGN(src->i_size, bs) - off; + ++ if (len == 0) { ++ ret = 0; ++ goto out_unlock; ++ } ++ + /* verify the end result is block aligned */ + if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || + !IS_ALIGNED(destoff, bs)) diff --git a/queue-3.14/btrfs-fix-inode-eviction-infinite-loop-after-extent_same-ioctl.patch b/queue-3.14/btrfs-fix-inode-eviction-infinite-loop-after-extent_same-ioctl.patch new file mode 100644 index 00000000000..626c43ba06b --- /dev/null +++ b/queue-3.14/btrfs-fix-inode-eviction-infinite-loop-after-extent_same-ioctl.patch @@ -0,0 +1,49 @@ +From 113e8283869b9855c8b999796aadd506bbac155f Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 30 Mar 2015 18:26:47 +0100 +Subject: Btrfs: fix inode eviction infinite loop after extent_same ioctl + +From: Filipe Manana + +commit 113e8283869b9855c8b999796aadd506bbac155f upstream. + +If we pass a length of 0 to the extent_same ioctl, we end up locking an +extent range with a start offset greater then its end offset (if the +destination file's offset is greater than zero). This results in a warning +from extent_io.c:insert_state through the following call chain: + + btrfs_extent_same() + btrfs_double_lock() + lock_extent_range() + lock_extent(inode->io_tree, offset, offset + len - 1) + lock_extent_bits() + __set_extent_bit() + insert_state() + --> WARN_ON(end < start) + +This leads to an infinite loop when evicting the inode. This is the same +problem that my previous patch titled +"Btrfs: fix inode eviction infinite loop after cloning into it" addressed +but for the extent_same ioctl instead of the clone ioctl. + +Signed-off-by: Filipe Manana +Reviewed-by: Omar Sandoval +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/ioctl.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -2708,6 +2708,9 @@ static int btrfs_extent_same(struct inod + if (src == dst) + return -EINVAL; + ++ if (len == 0) ++ return 0; ++ + btrfs_double_lock(src, loff, dst, dst_loff, len); + + ret = extent_same_check_offsets(src, loff, len); diff --git a/queue-3.14/btrfs-fix-log-tree-corruption-when-fs-mounted-with-o-discard.patch b/queue-3.14/btrfs-fix-log-tree-corruption-when-fs-mounted-with-o-discard.patch new file mode 100644 index 00000000000..aed73b2cd27 --- /dev/null +++ b/queue-3.14/btrfs-fix-log-tree-corruption-when-fs-mounted-with-o-discard.patch @@ -0,0 +1,57 @@ +From dcc82f4783ad91d4ab654f89f37ae9291cdc846a Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 23 Mar 2015 14:07:40 +0000 +Subject: Btrfs: fix log tree corruption when fs mounted with -o discard + +From: Filipe Manana + +commit dcc82f4783ad91d4ab654f89f37ae9291cdc846a upstream. + +While committing a transaction we free the log roots before we write the +new super block. Freeing the log roots implies marking the disk location +of every node/leaf (metadata extent) as pinned before the new super block +is written. This is to prevent the disk location of log metadata extents +from being reused before the new super block is written, otherwise we +would have a corrupted log tree if before the new super block is written +a crash/reboot happens and the location of any log tree metadata extent +ended up being reused and rewritten. + +Even though we pinned the log tree's metadata extents, we were issuing a +discard against them if the fs was mounted with the -o discard option, +resulting in corruption of the log tree if a crash/reboot happened before +writing the new super block - the next time the fs was mounted, during +the log replay process we would find nodes/leafs of the log btree with +a content full of zeroes, causing the process to fail and require the +use of the tool btrfs-zero-log to wipeout the log tree (and all data +previously fsynced becoming lost forever). + +Fix this by not doing a discard when pinning an extent. The discard will +be done later when it's safe (after the new super block is committed) at +extent-tree.c:btrfs_finish_extent_commit(). + +Fixes: e688b7252f78 (Btrfs: fix extent pinning bugs in the tree log) +Signed-off-by: Filipe Manana +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/extent-tree.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -6645,12 +6645,11 @@ static int __btrfs_free_reserved_extent( + return -ENOSPC; + } + +- if (btrfs_test_opt(root, DISCARD)) +- ret = btrfs_discard_extent(root, start, len, NULL); +- + if (pin) + pin_down_extent(root, cache, start, len, 1); + else { ++ if (btrfs_test_opt(root, DISCARD)) ++ ret = btrfs_discard_extent(root, start, len, NULL); + btrfs_add_free_space(cache, start, len); + btrfs_update_reserved_bytes(cache, len, RESERVE_FREE); + } diff --git a/queue-3.14/sched-idle-x86-optimize-unnecessary-mwait_idle-resched-ipis.patch b/queue-3.14/sched-idle-x86-optimize-unnecessary-mwait_idle-resched-ipis.patch new file mode 100644 index 00000000000..0b177f28dd6 --- /dev/null +++ b/queue-3.14/sched-idle-x86-optimize-unnecessary-mwait_idle-resched-ipis.patch @@ -0,0 +1,72 @@ +From f8e617f4582995f7c25ef25b4167213120ad122b Mon Sep 17 00:00:00 2001 +From: Mike Galbraith +Date: Sat, 18 Jan 2014 17:14:44 +0100 +Subject: sched/idle/x86: Optimize unnecessary mwait_idle() resched IPIs + +From: Mike Galbraith + +commit f8e617f4582995f7c25ef25b4167213120ad122b upstream. + +To fully take advantage of MWAIT, apparently the CLFLUSH instruction needs +another quirk on certain CPUs: proper barriers around it on certain machines. + +On a Q6600 SMP system, pipe-test scheduling performance, cross core, +improves significantly: + + 3.8.13 487.2 KHz 1.000 + 3.13.0-master 415.5 KHz .852 + 3.13.0-master+ 415.2 KHz .852 + restore mwait_idle + 3.13.0-master++ 488.5 KHz 1.002 + restore mwait_idle + IPI fix + +Since X86_BUG_CLFLUSH_MONITOR is already a quirk, don't create a separate +quirk for the extra smp_mb()s. + +Signed-off-by: Mike Galbraith +Cc: Borislav Petkov +Cc: H. Peter Anvin +Cc: Ian Malone +Cc: Josh Boyer +Cc: Len Brown +Cc: Len Brown +Cc: Linus Torvalds +Cc: Mike Galbraith +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/1390061684.5566.4.camel@marge.simpson.net +[ Ported to recent kernel, added comments about the quirk. ] +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/process.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -428,18 +428,22 @@ static int prefer_mwait_c1_over_halt(con + + static void mwait_idle(void) + { +- if (!need_resched()) { +- if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) ++ if (!current_set_polling_and_test()) { ++ if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { ++ smp_mb(); /* quirk */ + clflush((void *)¤t_thread_info()->flags); ++ smp_mb(); /* quirk */ ++ } + + __monitor((void *)¤t_thread_info()->flags, 0, 0); +- smp_mb(); + if (!need_resched()) + __sti_mwait(0, 0); + else + local_irq_enable(); +- } else ++ } else { + local_irq_enable(); ++ } ++ __current_clr_polling(); + } + + void select_idle_routine(const struct cpuinfo_x86 *c) diff --git a/queue-3.14/sched-idle-x86-restore-mwait_idle-to-fix-boot-hangs-to-improve-power-savings-and-to-improve-performance.patch b/queue-3.14/sched-idle-x86-restore-mwait_idle-to-fix-boot-hangs-to-improve-power-savings-and-to-improve-performance.patch new file mode 100644 index 00000000000..33fb6dd5509 --- /dev/null +++ b/queue-3.14/sched-idle-x86-restore-mwait_idle-to-fix-boot-hangs-to-improve-power-savings-and-to-improve-performance.patch @@ -0,0 +1,152 @@ +From b253149b843f89cd300cbdbea27ce1f847506f99 Mon Sep 17 00:00:00 2001 +From: Len Brown +Date: Wed, 15 Jan 2014 00:37:34 -0500 +Subject: sched/idle/x86: Restore mwait_idle() to fix boot hangs, to improve power savings and to improve performance + +From: Len Brown + +commit b253149b843f89cd300cbdbea27ce1f847506f99 upstream. + +In Linux-3.9 we removed the mwait_idle() loop: + + 69fb3676df33 ("x86 idle: remove mwait_idle() and "idle=mwait" cmdline param") + +The reasoning was that modern machines should be sufficiently +happy during the boot process using the default_idle() HALT +loop, until cpuidle loads and either acpi_idle or intel_idle +invoke the newer MWAIT-with-hints idle loop. + +But two machines reported problems: + + 1. Certain Core2-era machines support MWAIT-C1 and HALT only. + MWAIT-C1 is preferred for optimal power and performance. + But if they support just C1, cpuidle never loads and + so they use the boot-time default idle loop forever. + + 2. Some laptops will boot-hang if HALT is used, + but will boot successfully if MWAIT is used. + This appears to be a hidden assumption in BIOS SMI, + that is presumably valid on the proprietary OS + where the BIOS was validated. + + https://bugzilla.kernel.org/show_bug.cgi?id=60770 + +So here we effectively revert the patch above, restoring +the mwait_idle() loop. However, we don't bother restoring +the idle=mwait cmdline parameter, since it appears to add +no value. + +Maintainer notes: + + For 3.9, simply revert 69fb3676df + for 3.10, patch -F3 applies, fuzz needed due to __cpuinit use in + context For 3.11, 3.12, 3.13, this patch applies cleanly + +Tested-by: Mike Galbraith +Signed-off-by: Len Brown +Acked-by: Mike Galbraith +Cc: Borislav Petkov +Cc: H. Peter Anvin +Cc: Ian Malone +Cc: Josh Boyer +Cc: Linus Torvalds +Cc: Mike Galbraith +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/345254a551eb5a6a866e048d7ab570fd2193aca4.1389763084.git.len.brown@intel.com +[ Ported to recent kernels. ] +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/mwait.h | 8 +++++++ + arch/x86/kernel/process.c | 47 +++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 55 insertions(+) + +--- a/arch/x86/include/asm/mwait.h ++++ b/arch/x86/include/asm/mwait.h +@@ -30,6 +30,14 @@ static inline void __mwait(unsigned long + :: "a" (eax), "c" (ecx)); + } + ++static inline void __sti_mwait(unsigned long eax, unsigned long ecx) ++{ ++ trace_hardirqs_on(); ++ /* "mwait %eax, %ecx;" */ ++ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" ++ :: "a" (eax), "c" (ecx)); ++} ++ + /* + * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, + * which can obviate IPI to trigger checking of need_resched. +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -398,6 +399,49 @@ static void amd_e400_idle(void) + default_idle(); + } + ++/* ++ * Intel Core2 and older machines prefer MWAIT over HALT for C1. ++ * We can't rely on cpuidle installing MWAIT, because it will not load ++ * on systems that support only C1 -- so the boot default must be MWAIT. ++ * ++ * Some AMD machines are the opposite, they depend on using HALT. ++ * ++ * So for default C1, which is used during boot until cpuidle loads, ++ * use MWAIT-C1 on Intel HW that has it, else use HALT. ++ */ ++static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) ++{ ++ if (c->x86_vendor != X86_VENDOR_INTEL) ++ return 0; ++ ++ if (!cpu_has(c, X86_FEATURE_MWAIT)) ++ return 0; ++ ++ return 1; ++} ++ ++/* ++ * MONITOR/MWAIT with no hints, used for default default C1 state. ++ * This invokes MWAIT with interrutps enabled and no flags, ++ * which is backwards compatible with the original MWAIT implementation. ++ */ ++ ++static void mwait_idle(void) ++{ ++ if (!need_resched()) { ++ if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) ++ clflush((void *)¤t_thread_info()->flags); ++ ++ __monitor((void *)¤t_thread_info()->flags, 0, 0); ++ smp_mb(); ++ if (!need_resched()) ++ __sti_mwait(0, 0); ++ else ++ local_irq_enable(); ++ } else ++ local_irq_enable(); ++} ++ + void select_idle_routine(const struct cpuinfo_x86 *c) + { + #ifdef CONFIG_SMP +@@ -411,6 +455,9 @@ void select_idle_routine(const struct cp + /* E400: APIC timer interrupt does not wake up CPU from C1e */ + pr_info("using AMD E400 aware idle routine\n"); + x86_idle = amd_e400_idle; ++ } else if (prefer_mwait_c1_over_halt(c)) { ++ pr_info("using mwait in idle threads\n"); ++ x86_idle = mwait_idle; + } else + x86_idle = default_idle; + } diff --git a/queue-3.14/series b/queue-3.14/series index 9261bc5f49d..971804c6269 100644 --- a/queue-3.14/series +++ b/queue-3.14/series @@ -3,3 +3,9 @@ tcp-fix-possible-deadlock-in-tcp_send_fin.patch tcp-avoid-looping-in-tcp_send_fin.patch net-do-not-deplete-pfmemalloc-reserve.patch net-fix-crash-in-build_skb.patch +sched-idle-x86-restore-mwait_idle-to-fix-boot-hangs-to-improve-power-savings-and-to-improve-performance.patch +sched-idle-x86-optimize-unnecessary-mwait_idle-resched-ipis.patch +btrfs-fix-log-tree-corruption-when-fs-mounted-with-o-discard.patch +btrfs-don-t-accept-bare-namespace-as-a-valid-xattr.patch +btrfs-fix-inode-eviction-infinite-loop-after-cloning-into-it.patch +btrfs-fix-inode-eviction-infinite-loop-after-extent_same-ioctl.patch -- 2.47.3