From: Greg Kroah-Hartman Date: Wed, 29 Apr 2015 12:11:27 +0000 (+0200) Subject: 3.19-stable patches X-Git-Tag: v3.10.77~61 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b7eef177a8d33b38d7b18953f0eb0e33004bfc8e;p=thirdparty%2Fkernel%2Fstable-queue.git 3.19-stable patches added patches: btrfs-don-t-accept-bare-namespace-as-a-valid-xattr.patch btrfs-fix-inode-eviction-infinite-loop-after-cloning-into-it.patch btrfs-fix-inode-eviction-infinite-loop-after-extent_same-ioctl.patch btrfs-fix-log-tree-corruption-when-fs-mounted-with-o-discard.patch kvm-x86-fix-msr_ia32_bndcfgs-in-msrs_to_save.patch md-fix-md-io-stats-accounting-broken.patch perf-x86-intel-fix-core2-atom-nhm-wsm-cycles-pp-events.patch sched-idle-x86-optimize-unnecessary-mwait_idle-resched-ipis.patch sched-idle-x86-restore-mwait_idle-to-fix-boot-hangs-to-improve-power-savings-and-to-improve-performance.patch x86-asm-decoder-fix-and-enforce-max-instruction-size-in-the-insn-decoder.patch --- diff --git a/queue-3.19/btrfs-don-t-accept-bare-namespace-as-a-valid-xattr.patch b/queue-3.19/btrfs-don-t-accept-bare-namespace-as-a-valid-xattr.patch new file mode 100644 index 00000000000..9328adf5b35 --- /dev/null +++ b/queue-3.19/btrfs-don-t-accept-bare-namespace-as-a-valid-xattr.patch @@ -0,0 +1,134 @@ +From 3c3b04d10ff1811a27f86684ccd2f5ba6983211d Mon Sep 17 00:00:00 2001 +From: David Sterba +Date: Wed, 25 Mar 2015 19:26:41 +0100 +Subject: btrfs: don't accept bare namespace as a valid xattr + +From: David Sterba + +commit 3c3b04d10ff1811a27f86684ccd2f5ba6983211d upstream. + +Due to insufficient check in btrfs_is_valid_xattr, this unexpectedly +works: + + $ touch file + $ setfattr -n user. -v 1 file + $ getfattr -d file +user.="1" + +ie. the missing attribute name after the namespace. + +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=94291 +Reported-by: William Douglas +Signed-off-by: David Sterba +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/xattr.c | 53 +++++++++++++++++++++++++++++++++++++++-------------- + 1 file changed, 39 insertions(+), 14 deletions(-) + +--- a/fs/btrfs/xattr.c ++++ b/fs/btrfs/xattr.c +@@ -360,22 +360,42 @@ const struct xattr_handler *btrfs_xattr_ + /* + * Check if the attribute is in a supported namespace. + * +- * This applied after the check for the synthetic attributes in the system ++ * This is applied after the check for the synthetic attributes in the system + * namespace. + */ +-static bool btrfs_is_valid_xattr(const char *name) ++static int btrfs_is_valid_xattr(const char *name) + { +- return !strncmp(name, XATTR_SECURITY_PREFIX, +- XATTR_SECURITY_PREFIX_LEN) || +- !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) || +- !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || +- !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) || +- !strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN); ++ int len = strlen(name); ++ int prefixlen = 0; ++ ++ if (!strncmp(name, XATTR_SECURITY_PREFIX, ++ XATTR_SECURITY_PREFIX_LEN)) ++ prefixlen = XATTR_SECURITY_PREFIX_LEN; ++ else if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) ++ prefixlen = XATTR_SYSTEM_PREFIX_LEN; ++ else if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) ++ prefixlen = XATTR_TRUSTED_PREFIX_LEN; ++ else if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) ++ prefixlen = XATTR_USER_PREFIX_LEN; ++ else if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN)) ++ prefixlen = XATTR_BTRFS_PREFIX_LEN; ++ else ++ return -EOPNOTSUPP; ++ ++ /* ++ * The name cannot consist of just prefix ++ */ ++ if (len <= prefixlen) ++ return -EINVAL; ++ ++ return 0; + } + + ssize_t btrfs_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size) + { ++ int ret; ++ + /* + * If this is a request for a synthetic attribute in the system.* + * namespace use the generic infrastructure to resolve a handler +@@ -384,8 +404,9 @@ ssize_t btrfs_getxattr(struct dentry *de + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return generic_getxattr(dentry, name, buffer, size); + +- if (!btrfs_is_valid_xattr(name)) +- return -EOPNOTSUPP; ++ ret = btrfs_is_valid_xattr(name); ++ if (ret) ++ return ret; + return __btrfs_getxattr(dentry->d_inode, name, buffer, size); + } + +@@ -393,6 +414,7 @@ int btrfs_setxattr(struct dentry *dentry + size_t size, int flags) + { + struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; ++ int ret; + + /* + * The permission on security.* and system.* is not checked +@@ -409,8 +431,9 @@ int btrfs_setxattr(struct dentry *dentry + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return generic_setxattr(dentry, name, value, size, flags); + +- if (!btrfs_is_valid_xattr(name)) +- return -EOPNOTSUPP; ++ ret = btrfs_is_valid_xattr(name); ++ if (ret) ++ return ret; + + if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN)) + return btrfs_set_prop(dentry->d_inode, name, +@@ -426,6 +449,7 @@ int btrfs_setxattr(struct dentry *dentry + int btrfs_removexattr(struct dentry *dentry, const char *name) + { + struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root; ++ int ret; + + /* + * The permission on security.* and system.* is not checked +@@ -442,8 +466,9 @@ int btrfs_removexattr(struct dentry *den + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return generic_removexattr(dentry, name); + +- if (!btrfs_is_valid_xattr(name)) +- return -EOPNOTSUPP; ++ ret = btrfs_is_valid_xattr(name); ++ if (ret) ++ return ret; + + if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN)) + return btrfs_set_prop(dentry->d_inode, name, diff --git a/queue-3.19/btrfs-fix-inode-eviction-infinite-loop-after-cloning-into-it.patch b/queue-3.19/btrfs-fix-inode-eviction-infinite-loop-after-cloning-into-it.patch new file mode 100644 index 00000000000..1804e0175b3 --- /dev/null +++ b/queue-3.19/btrfs-fix-inode-eviction-infinite-loop-after-cloning-into-it.patch @@ -0,0 +1,103 @@ +From ccccf3d67294714af2d72a6fd6fd7d73b01c9329 Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 30 Mar 2015 18:23:59 +0100 +Subject: Btrfs: fix inode eviction infinite loop after cloning into it + +From: Filipe Manana + +commit ccccf3d67294714af2d72a6fd6fd7d73b01c9329 upstream. + +If we attempt to clone a 0 length region into a file we can end up +inserting a range in the inode's extent_io tree with a start offset +that is greater then the end offset, which triggers immediately the +following warning: + +[ 3914.619057] WARNING: CPU: 17 PID: 4199 at fs/btrfs/extent_io.c:435 insert_state+0x4b/0x10b [btrfs]() +[ 3914.620886] BTRFS: end < start 4095 4096 +(...) +[ 3914.638093] Call Trace: +[ 3914.638636] [] dump_stack+0x4c/0x65 +[ 3914.639620] [] warn_slowpath_common+0xa1/0xbb +[ 3914.640789] [] ? insert_state+0x4b/0x10b [btrfs] +[ 3914.642041] [] warn_slowpath_fmt+0x46/0x48 +[ 3914.643236] [] insert_state+0x4b/0x10b [btrfs] +[ 3914.644441] [] __set_extent_bit+0x107/0x3f4 [btrfs] +[ 3914.645711] [] lock_extent_bits+0x65/0x1bf [btrfs] +[ 3914.646914] [] ? _raw_spin_unlock+0x28/0x33 +[ 3914.648058] [] ? test_range_bit+0xcc/0xde [btrfs] +[ 3914.650105] [] lock_extent+0x13/0x15 [btrfs] +[ 3914.651361] [] lock_extent_range+0x3d/0xcd [btrfs] +[ 3914.652761] [] btrfs_ioctl_clone+0x278/0x388 [btrfs] +[ 3914.654128] [] ? might_fault+0x58/0xb5 +[ 3914.655320] [] btrfs_ioctl+0xb51/0x2195 [btrfs] +(...) +[ 3914.669271] ---[ end trace 14843d3e2e622fc1 ]--- + +This later makes the inode eviction handler enter an infinite loop that +keeps dumping the following warning over and over: + +[ 3915.117629] WARNING: CPU: 22 PID: 4228 at fs/btrfs/extent_io.c:435 insert_state+0x4b/0x10b [btrfs]() +[ 3915.119913] BTRFS: end < start 4095 4096 +(...) +[ 3915.137394] Call Trace: +[ 3915.137913] [] dump_stack+0x4c/0x65 +[ 3915.139154] [] warn_slowpath_common+0xa1/0xbb +[ 3915.140316] [] ? insert_state+0x4b/0x10b [btrfs] +[ 3915.141505] [] warn_slowpath_fmt+0x46/0x48 +[ 3915.142709] [] insert_state+0x4b/0x10b [btrfs] +[ 3915.143849] [] __set_extent_bit+0x107/0x3f4 [btrfs] +[ 3915.145120] [] ? btrfs_kill_super+0x17/0x23 [btrfs] +[ 3915.146352] [] ? deactivate_locked_super+0x3b/0x50 +[ 3915.147565] [] lock_extent_bits+0x65/0x1bf [btrfs] +[ 3915.148785] [] ? _raw_write_unlock+0x28/0x33 +[ 3915.149931] [] btrfs_evict_inode+0x196/0x482 [btrfs] +[ 3915.151154] [] evict+0xa0/0x148 +[ 3915.152094] [] dispose_list+0x39/0x43 +[ 3915.153081] [] evict_inodes+0xdc/0xeb +[ 3915.154062] [] generic_shutdown_super+0x49/0xef +[ 3915.155193] [] kill_anon_super+0x13/0x1e +[ 3915.156274] [] btrfs_kill_super+0x17/0x23 [btrfs] +(...) +[ 3915.167404] ---[ end trace 14843d3e2e622fc2 ]--- + +So just bail out of the clone ioctl if the length of the region to clone +is zero, without locking any extent range, in order to prevent this issue +(same behaviour as a pwrite with a 0 length for example). + +This is trivial to reproduce. For example, the steps for the test I just +made for fstests: + + mkfs.btrfs -f SCRATCH_DEV + mount SCRATCH_DEV $SCRATCH_MNT + + touch $SCRATCH_MNT/foo + touch $SCRATCH_MNT/bar + + $CLONER_PROG -s 0 -d 4096 -l 0 $SCRATCH_MNT/foo $SCRATCH_MNT/bar + umount $SCRATCH_MNT + +A test case for fstests follows soon. + +Signed-off-by: Filipe Manana +Reviewed-by: Omar Sandoval +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/ioctl.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -3626,6 +3626,11 @@ static noinline long btrfs_ioctl_clone(s + if (off + len == src->i_size) + len = ALIGN(src->i_size, bs) - off; + ++ if (len == 0) { ++ ret = 0; ++ goto out_unlock; ++ } ++ + /* verify the end result is block aligned */ + if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || + !IS_ALIGNED(destoff, bs)) diff --git a/queue-3.19/btrfs-fix-inode-eviction-infinite-loop-after-extent_same-ioctl.patch b/queue-3.19/btrfs-fix-inode-eviction-infinite-loop-after-extent_same-ioctl.patch new file mode 100644 index 00000000000..63c518dc96e --- /dev/null +++ b/queue-3.19/btrfs-fix-inode-eviction-infinite-loop-after-extent_same-ioctl.patch @@ -0,0 +1,49 @@ +From 113e8283869b9855c8b999796aadd506bbac155f Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 30 Mar 2015 18:26:47 +0100 +Subject: Btrfs: fix inode eviction infinite loop after extent_same ioctl + +From: Filipe Manana + +commit 113e8283869b9855c8b999796aadd506bbac155f upstream. + +If we pass a length of 0 to the extent_same ioctl, we end up locking an +extent range with a start offset greater then its end offset (if the +destination file's offset is greater than zero). This results in a warning +from extent_io.c:insert_state through the following call chain: + + btrfs_extent_same() + btrfs_double_lock() + lock_extent_range() + lock_extent(inode->io_tree, offset, offset + len - 1) + lock_extent_bits() + __set_extent_bit() + insert_state() + --> WARN_ON(end < start) + +This leads to an infinite loop when evicting the inode. This is the same +problem that my previous patch titled +"Btrfs: fix inode eviction infinite loop after cloning into it" addressed +but for the extent_same ioctl instead of the clone ioctl. + +Signed-off-by: Filipe Manana +Reviewed-by: Omar Sandoval +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/ioctl.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -2897,6 +2897,9 @@ static int btrfs_extent_same(struct inod + if (src == dst) + return -EINVAL; + ++ if (len == 0) ++ return 0; ++ + btrfs_double_lock(src, loff, dst, dst_loff, len); + + ret = extent_same_check_offsets(src, loff, len); diff --git a/queue-3.19/btrfs-fix-log-tree-corruption-when-fs-mounted-with-o-discard.patch b/queue-3.19/btrfs-fix-log-tree-corruption-when-fs-mounted-with-o-discard.patch new file mode 100644 index 00000000000..07e2e4ef0c3 --- /dev/null +++ b/queue-3.19/btrfs-fix-log-tree-corruption-when-fs-mounted-with-o-discard.patch @@ -0,0 +1,57 @@ +From dcc82f4783ad91d4ab654f89f37ae9291cdc846a Mon Sep 17 00:00:00 2001 +From: Filipe Manana +Date: Mon, 23 Mar 2015 14:07:40 +0000 +Subject: Btrfs: fix log tree corruption when fs mounted with -o discard + +From: Filipe Manana + +commit dcc82f4783ad91d4ab654f89f37ae9291cdc846a upstream. + +While committing a transaction we free the log roots before we write the +new super block. Freeing the log roots implies marking the disk location +of every node/leaf (metadata extent) as pinned before the new super block +is written. This is to prevent the disk location of log metadata extents +from being reused before the new super block is written, otherwise we +would have a corrupted log tree if before the new super block is written +a crash/reboot happens and the location of any log tree metadata extent +ended up being reused and rewritten. + +Even though we pinned the log tree's metadata extents, we were issuing a +discard against them if the fs was mounted with the -o discard option, +resulting in corruption of the log tree if a crash/reboot happened before +writing the new super block - the next time the fs was mounted, during +the log replay process we would find nodes/leafs of the log btree with +a content full of zeroes, causing the process to fail and require the +use of the tool btrfs-zero-log to wipeout the log tree (and all data +previously fsynced becoming lost forever). + +Fix this by not doing a discard when pinning an extent. The discard will +be done later when it's safe (after the new super block is committed) at +extent-tree.c:btrfs_finish_extent_commit(). + +Fixes: e688b7252f78 (Btrfs: fix extent pinning bugs in the tree log) +Signed-off-by: Filipe Manana +Signed-off-by: Chris Mason +Signed-off-by: Greg Kroah-Hartman + +--- + fs/btrfs/extent-tree.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/fs/btrfs/extent-tree.c ++++ b/fs/btrfs/extent-tree.c +@@ -6967,12 +6967,11 @@ static int __btrfs_free_reserved_extent( + return -ENOSPC; + } + +- if (btrfs_test_opt(root, DISCARD)) +- ret = btrfs_discard_extent(root, start, len, NULL); +- + if (pin) + pin_down_extent(root, cache, start, len, 1); + else { ++ if (btrfs_test_opt(root, DISCARD)) ++ ret = btrfs_discard_extent(root, start, len, NULL); + btrfs_add_free_space(cache, start, len); + btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); + } diff --git a/queue-3.19/kvm-x86-fix-msr_ia32_bndcfgs-in-msrs_to_save.patch b/queue-3.19/kvm-x86-fix-msr_ia32_bndcfgs-in-msrs_to_save.patch new file mode 100644 index 00000000000..78d59aeb9fd --- /dev/null +++ b/queue-3.19/kvm-x86-fix-msr_ia32_bndcfgs-in-msrs_to_save.patch @@ -0,0 +1,51 @@ +From 9e9c3fe40bcd28e3f98f0ad8408435f4503f2781 Mon Sep 17 00:00:00 2001 +From: Nadav Amit +Date: Sun, 12 Apr 2015 21:47:15 +0300 +Subject: KVM: x86: Fix MSR_IA32_BNDCFGS in msrs_to_save + +From: Nadav Amit + +commit 9e9c3fe40bcd28e3f98f0ad8408435f4503f2781 upstream. + +kvm_init_msr_list is currently called before hardware_setup. As a result, +vmx_mpx_supported always returns false when kvm_init_msr_list checks whether to +save MSR_IA32_BNDCFGS. + +Move kvm_init_msr_list after vmx_hardware_setup is called to fix this issue. + +Signed-off-by: Nadav Amit +Signed-off-by: Greg Kroah-Hartman + +Message-Id: <1428864435-4732-1-git-send-email-namit@cs.technion.ac.il> +Signed-off-by: Paolo Bonzini + +--- + arch/x86/kvm/x86.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -5781,7 +5781,6 @@ int kvm_arch_init(void *opaque) + kvm_set_mmio_spte_mask(); + + kvm_x86_ops = ops; +- kvm_init_msr_list(); + + kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, + PT_DIRTY_MASK, PT64_NX_MASK, 0); +@@ -7218,7 +7217,14 @@ void kvm_arch_hardware_disable(void) + + int kvm_arch_hardware_setup(void) + { +- return kvm_x86_ops->hardware_setup(); ++ int r; ++ ++ r = kvm_x86_ops->hardware_setup(); ++ if (r != 0) ++ return r; ++ ++ kvm_init_msr_list(); ++ return 0; + } + + void kvm_arch_hardware_unsetup(void) diff --git a/queue-3.19/md-fix-md-io-stats-accounting-broken.patch b/queue-3.19/md-fix-md-io-stats-accounting-broken.patch new file mode 100644 index 00000000000..62629b72d92 --- /dev/null +++ b/queue-3.19/md-fix-md-io-stats-accounting-broken.patch @@ -0,0 +1,59 @@ +From 74672d069b298b03e9f657fd70915e055739882e Mon Sep 17 00:00:00 2001 +From: Gu Zheng +Date: Fri, 3 Apr 2015 08:44:47 +0800 +Subject: md: fix md io stats accounting broken + +From: Gu Zheng + +commit 74672d069b298b03e9f657fd70915e055739882e upstream. + +Simon reported the md io stats accounting issue: +" +I'm seeing "iostat -x -k 1" print this after a RAID1 rebuild on 4.0-rc5. +It's not abnormal other than it's 3-disk, with one being SSD (sdc) and +the other two being write-mostly: + +Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await r_await w_await svctm %util +sda 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 +sdb 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 +sdc 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 +md0 0.00 0.00 0.00 0.00 0.00 0.00 0.00 345.00 0.00 0.00 0.00 0.00 100.00 +md2 0.00 0.00 0.00 0.00 0.00 0.00 0.00 58779.00 0.00 0.00 0.00 0.00 100.00 +md1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 12.00 0.00 0.00 0.00 0.00 100.00 +" +The cause is commit "18c0b223cf9901727ef3b02da6711ac930b4e5d4" uses the +generic_start_io_acct to account the disk stats rather than the open code, +but it also introduced the increase to .in_flight[rw] which is needless to +md. So we re-use the open code here to fix it. + +Reported-by: Simon Kirby +Signed-off-by: Gu Zheng +Signed-off-by: NeilBrown +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/md/md.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/md/md.c ++++ b/drivers/md/md.c +@@ -248,6 +248,7 @@ static void md_make_request(struct reque + const int rw = bio_data_dir(bio); + struct mddev *mddev = q->queuedata; + unsigned int sectors; ++ int cpu; + + if (mddev == NULL || mddev->pers == NULL + || !mddev->ready) { +@@ -283,7 +284,10 @@ static void md_make_request(struct reque + sectors = bio_sectors(bio); + mddev->pers->make_request(mddev, bio); + +- generic_start_io_acct(rw, sectors, &mddev->gendisk->part0); ++ cpu = part_stat_lock(); ++ part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); ++ part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors); ++ part_stat_unlock(); + + if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) + wake_up(&mddev->sb_wait); diff --git a/queue-3.19/perf-x86-intel-fix-core2-atom-nhm-wsm-cycles-pp-events.patch b/queue-3.19/perf-x86-intel-fix-core2-atom-nhm-wsm-cycles-pp-events.patch new file mode 100644 index 00000000000..15adbd01a21 --- /dev/null +++ b/queue-3.19/perf-x86-intel-fix-core2-atom-nhm-wsm-cycles-pp-events.patch @@ -0,0 +1,70 @@ +From 517e6341fa123ec3a2f9ea78ad547be910529881 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Sat, 11 Apr 2015 12:16:22 +0200 +Subject: perf/x86/intel: Fix Core2,Atom,NHM,WSM cycles:pp events + +From: Peter Zijlstra + +commit 517e6341fa123ec3a2f9ea78ad547be910529881 upstream. + +Ingo reported that cycles:pp didn't work for him on some machines. + +It turns out that in this commit: + + af4bdcf675cf perf/x86/intel: Disallow flags for most Core2/Atom/Nehalem/Westmere events + +Andi forgot to explicitly allow that event when he +disabled event flags for PEBS on those uarchs. + +Reported-by: Ingo Molnar +Signed-off-by: Peter Zijlstra (Intel) +Cc: Arnaldo Carvalho de Melo +Cc: Jiri Olsa +Cc: Linus Torvalds +Cc: Peter Zijlstra +Fixes: af4bdcf675cf ("perf/x86/intel: Disallow flags for most Core2/Atom/Nehalem/Westmere events") +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/perf_event_intel_ds.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c ++++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c +@@ -557,6 +557,8 @@ struct event_constraint intel_core2_pebs + INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ + INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ ++ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ ++ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01), + EVENT_CONSTRAINT_END + }; + +@@ -564,6 +566,8 @@ struct event_constraint intel_atom_pebs_ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ + INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ + INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ ++ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ ++ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01), + EVENT_CONSTRAINT_END + }; + +@@ -587,6 +591,8 @@ struct event_constraint intel_nehalem_pe + INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ + INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ ++ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ ++ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), + EVENT_CONSTRAINT_END + }; + +@@ -602,6 +608,8 @@ struct event_constraint intel_westmere_p + INTEL_FLAGS_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ + INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ ++ /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */ ++ INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f), + EVENT_CONSTRAINT_END + }; + diff --git a/queue-3.19/sched-idle-x86-optimize-unnecessary-mwait_idle-resched-ipis.patch b/queue-3.19/sched-idle-x86-optimize-unnecessary-mwait_idle-resched-ipis.patch new file mode 100644 index 00000000000..0b177f28dd6 --- /dev/null +++ b/queue-3.19/sched-idle-x86-optimize-unnecessary-mwait_idle-resched-ipis.patch @@ -0,0 +1,72 @@ +From f8e617f4582995f7c25ef25b4167213120ad122b Mon Sep 17 00:00:00 2001 +From: Mike Galbraith +Date: Sat, 18 Jan 2014 17:14:44 +0100 +Subject: sched/idle/x86: Optimize unnecessary mwait_idle() resched IPIs + +From: Mike Galbraith + +commit f8e617f4582995f7c25ef25b4167213120ad122b upstream. + +To fully take advantage of MWAIT, apparently the CLFLUSH instruction needs +another quirk on certain CPUs: proper barriers around it on certain machines. + +On a Q6600 SMP system, pipe-test scheduling performance, cross core, +improves significantly: + + 3.8.13 487.2 KHz 1.000 + 3.13.0-master 415.5 KHz .852 + 3.13.0-master+ 415.2 KHz .852 + restore mwait_idle + 3.13.0-master++ 488.5 KHz 1.002 + restore mwait_idle + IPI fix + +Since X86_BUG_CLFLUSH_MONITOR is already a quirk, don't create a separate +quirk for the extra smp_mb()s. + +Signed-off-by: Mike Galbraith +Cc: Borislav Petkov +Cc: H. Peter Anvin +Cc: Ian Malone +Cc: Josh Boyer +Cc: Len Brown +Cc: Len Brown +Cc: Linus Torvalds +Cc: Mike Galbraith +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/1390061684.5566.4.camel@marge.simpson.net +[ Ported to recent kernel, added comments about the quirk. ] +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/process.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -428,18 +428,22 @@ static int prefer_mwait_c1_over_halt(con + + static void mwait_idle(void) + { +- if (!need_resched()) { +- if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) ++ if (!current_set_polling_and_test()) { ++ if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { ++ smp_mb(); /* quirk */ + clflush((void *)¤t_thread_info()->flags); ++ smp_mb(); /* quirk */ ++ } + + __monitor((void *)¤t_thread_info()->flags, 0, 0); +- smp_mb(); + if (!need_resched()) + __sti_mwait(0, 0); + else + local_irq_enable(); +- } else ++ } else { + local_irq_enable(); ++ } ++ __current_clr_polling(); + } + + void select_idle_routine(const struct cpuinfo_x86 *c) diff --git a/queue-3.19/sched-idle-x86-restore-mwait_idle-to-fix-boot-hangs-to-improve-power-savings-and-to-improve-performance.patch b/queue-3.19/sched-idle-x86-restore-mwait_idle-to-fix-boot-hangs-to-improve-power-savings-and-to-improve-performance.patch new file mode 100644 index 00000000000..33fb6dd5509 --- /dev/null +++ b/queue-3.19/sched-idle-x86-restore-mwait_idle-to-fix-boot-hangs-to-improve-power-savings-and-to-improve-performance.patch @@ -0,0 +1,152 @@ +From b253149b843f89cd300cbdbea27ce1f847506f99 Mon Sep 17 00:00:00 2001 +From: Len Brown +Date: Wed, 15 Jan 2014 00:37:34 -0500 +Subject: sched/idle/x86: Restore mwait_idle() to fix boot hangs, to improve power savings and to improve performance + +From: Len Brown + +commit b253149b843f89cd300cbdbea27ce1f847506f99 upstream. + +In Linux-3.9 we removed the mwait_idle() loop: + + 69fb3676df33 ("x86 idle: remove mwait_idle() and "idle=mwait" cmdline param") + +The reasoning was that modern machines should be sufficiently +happy during the boot process using the default_idle() HALT +loop, until cpuidle loads and either acpi_idle or intel_idle +invoke the newer MWAIT-with-hints idle loop. + +But two machines reported problems: + + 1. Certain Core2-era machines support MWAIT-C1 and HALT only. + MWAIT-C1 is preferred for optimal power and performance. + But if they support just C1, cpuidle never loads and + so they use the boot-time default idle loop forever. + + 2. Some laptops will boot-hang if HALT is used, + but will boot successfully if MWAIT is used. + This appears to be a hidden assumption in BIOS SMI, + that is presumably valid on the proprietary OS + where the BIOS was validated. + + https://bugzilla.kernel.org/show_bug.cgi?id=60770 + +So here we effectively revert the patch above, restoring +the mwait_idle() loop. However, we don't bother restoring +the idle=mwait cmdline parameter, since it appears to add +no value. + +Maintainer notes: + + For 3.9, simply revert 69fb3676df + for 3.10, patch -F3 applies, fuzz needed due to __cpuinit use in + context For 3.11, 3.12, 3.13, this patch applies cleanly + +Tested-by: Mike Galbraith +Signed-off-by: Len Brown +Acked-by: Mike Galbraith +Cc: Borislav Petkov +Cc: H. Peter Anvin +Cc: Ian Malone +Cc: Josh Boyer +Cc: Linus Torvalds +Cc: Mike Galbraith +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/345254a551eb5a6a866e048d7ab570fd2193aca4.1389763084.git.len.brown@intel.com +[ Ported to recent kernels. ] +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/mwait.h | 8 +++++++ + arch/x86/kernel/process.c | 47 +++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 55 insertions(+) + +--- a/arch/x86/include/asm/mwait.h ++++ b/arch/x86/include/asm/mwait.h +@@ -30,6 +30,14 @@ static inline void __mwait(unsigned long + :: "a" (eax), "c" (ecx)); + } + ++static inline void __sti_mwait(unsigned long eax, unsigned long ecx) ++{ ++ trace_hardirqs_on(); ++ /* "mwait %eax, %ecx;" */ ++ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" ++ :: "a" (eax), "c" (ecx)); ++} ++ + /* + * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, + * which can obviate IPI to trigger checking of need_resched. +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -398,6 +399,49 @@ static void amd_e400_idle(void) + default_idle(); + } + ++/* ++ * Intel Core2 and older machines prefer MWAIT over HALT for C1. ++ * We can't rely on cpuidle installing MWAIT, because it will not load ++ * on systems that support only C1 -- so the boot default must be MWAIT. ++ * ++ * Some AMD machines are the opposite, they depend on using HALT. ++ * ++ * So for default C1, which is used during boot until cpuidle loads, ++ * use MWAIT-C1 on Intel HW that has it, else use HALT. ++ */ ++static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) ++{ ++ if (c->x86_vendor != X86_VENDOR_INTEL) ++ return 0; ++ ++ if (!cpu_has(c, X86_FEATURE_MWAIT)) ++ return 0; ++ ++ return 1; ++} ++ ++/* ++ * MONITOR/MWAIT with no hints, used for default default C1 state. ++ * This invokes MWAIT with interrutps enabled and no flags, ++ * which is backwards compatible with the original MWAIT implementation. ++ */ ++ ++static void mwait_idle(void) ++{ ++ if (!need_resched()) { ++ if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) ++ clflush((void *)¤t_thread_info()->flags); ++ ++ __monitor((void *)¤t_thread_info()->flags, 0, 0); ++ smp_mb(); ++ if (!need_resched()) ++ __sti_mwait(0, 0); ++ else ++ local_irq_enable(); ++ } else ++ local_irq_enable(); ++} ++ + void select_idle_routine(const struct cpuinfo_x86 *c) + { + #ifdef CONFIG_SMP +@@ -411,6 +455,9 @@ void select_idle_routine(const struct cp + /* E400: APIC timer interrupt does not wake up CPU from C1e */ + pr_info("using AMD E400 aware idle routine\n"); + x86_idle = amd_e400_idle; ++ } else if (prefer_mwait_c1_over_halt(c)) { ++ pr_info("using mwait in idle threads\n"); ++ x86_idle = mwait_idle; + } else + x86_idle = default_idle; + } diff --git a/queue-3.19/series b/queue-3.19/series index 70fb1b52b78..5c8c515c488 100644 --- a/queue-3.19/series +++ b/queue-3.19/series @@ -7,3 +7,13 @@ net-do-not-deplete-pfmemalloc-reserve.patch net-fix-crash-in-build_skb.patch pxa168-fix-double-deallocation-of-managed-resources.patch net-mlx4_en-prevent-setting-invalid-rss-hash-function.patch +md-fix-md-io-stats-accounting-broken.patch +x86-asm-decoder-fix-and-enforce-max-instruction-size-in-the-insn-decoder.patch +sched-idle-x86-restore-mwait_idle-to-fix-boot-hangs-to-improve-power-savings-and-to-improve-performance.patch +sched-idle-x86-optimize-unnecessary-mwait_idle-resched-ipis.patch +perf-x86-intel-fix-core2-atom-nhm-wsm-cycles-pp-events.patch +kvm-x86-fix-msr_ia32_bndcfgs-in-msrs_to_save.patch +btrfs-fix-log-tree-corruption-when-fs-mounted-with-o-discard.patch +btrfs-don-t-accept-bare-namespace-as-a-valid-xattr.patch +btrfs-fix-inode-eviction-infinite-loop-after-cloning-into-it.patch +btrfs-fix-inode-eviction-infinite-loop-after-extent_same-ioctl.patch diff --git a/queue-3.19/x86-asm-decoder-fix-and-enforce-max-instruction-size-in-the-insn-decoder.patch b/queue-3.19/x86-asm-decoder-fix-and-enforce-max-instruction-size-in-the-insn-decoder.patch new file mode 100644 index 00000000000..1c1fc1cc94e --- /dev/null +++ b/queue-3.19/x86-asm-decoder-fix-and-enforce-max-instruction-size-in-the-insn-decoder.patch @@ -0,0 +1,62 @@ +From 91e5ed49fca09c2b83b262b9757d1376ee2b46c3 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Tue, 27 Jan 2015 16:06:02 -0800 +Subject: x86/asm/decoder: Fix and enforce max instruction size in the insn decoder + +From: Andy Lutomirski + +commit 91e5ed49fca09c2b83b262b9757d1376ee2b46c3 upstream. + +x86 instructions cannot exceed 15 bytes, and the instruction +decoder should enforce that. Prior to 6ba48ff46f76, the +instruction length limit was implicitly set to 16, which was an +approximation of 15, but there is currently no limit at all. + +Fix MAX_INSN_SIZE (it should be 15, not 16), and fix the decoder +to reject instructions that exceed MAX_INSN_SIZE. + +Other than potentially confusing some of the decoder sanity +checks, I'm not aware of any actual problems that omitting this +check would cause, nor am I aware of any practical problems +caused by the MAX_INSN_SIZE error. + +Signed-off-by: Andy Lutomirski +Acked-by: Masami Hiramatsu +Cc: Dave Hansen +Fixes: 6ba48ff46f76 ("x86: Remove arbitrary instruction size limit ... +Link: http://lkml.kernel.org/r/f8f0bc9b8c58cfd6830f7d88400bf1396cbdcd0f.1422403511.git.luto@amacapital.net +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/insn.h | 2 +- + arch/x86/lib/insn.c | 7 +++++++ + 2 files changed, 8 insertions(+), 1 deletion(-) + +--- a/arch/x86/include/asm/insn.h ++++ b/arch/x86/include/asm/insn.h +@@ -69,7 +69,7 @@ struct insn { + const insn_byte_t *next_byte; + }; + +-#define MAX_INSN_SIZE 16 ++#define MAX_INSN_SIZE 15 + + #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) + #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) +--- a/arch/x86/lib/insn.c ++++ b/arch/x86/lib/insn.c +@@ -52,6 +52,13 @@ + */ + void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) + { ++ /* ++ * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid ++ * even if the input buffer is long enough to hold them. ++ */ ++ if (buf_len > MAX_INSN_SIZE) ++ buf_len = MAX_INSN_SIZE; ++ + memset(insn, 0, sizeof(*insn)); + insn->kaddr = kaddr; + insn->end_kaddr = kaddr + buf_len;