From: Greg Kroah-Hartman Date: Sat, 7 May 2016 04:48:58 +0000 (-0400) Subject: 4.4-stable patches X-Git-Tag: v3.14.69~20 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3297859de8621606b5a6054a586c00b4c01cd1a3;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: arm-cpuidle-pass-on-arm_cpuidle_suspend-s-return-value.patch fs-pnode.c-treat-zero-mnt_group_id-s-as-unequal.patch maintainers-remove-asterisk-from-efi-directory-names.patch propogate_mnt-handle-the-first-propogated-copy-being-a-slave.patch writeback-fix-performance-regression-in-wb_over_bg_thresh.patch x86-tsc-read-all-ratio-bits-from-msr_platform_info.patch --- diff --git a/queue-4.4/arm-cpuidle-pass-on-arm_cpuidle_suspend-s-return-value.patch b/queue-4.4/arm-cpuidle-pass-on-arm_cpuidle_suspend-s-return-value.patch new file mode 100644 index 00000000000..b1b3f683387 --- /dev/null +++ b/queue-4.4/arm-cpuidle-pass-on-arm_cpuidle_suspend-s-return-value.patch @@ -0,0 +1,37 @@ +From 625fe4f8ffc1b915248558481bb94249f6bd411c Mon Sep 17 00:00:00 2001 +From: James Morse +Date: Tue, 26 Apr 2016 12:15:01 +0100 +Subject: ARM: cpuidle: Pass on arm_cpuidle_suspend()'s return value + +From: James Morse + +commit 625fe4f8ffc1b915248558481bb94249f6bd411c upstream. + +arm_cpuidle_suspend() may return -EOPNOTSUPP, or any value returned +by the cpu_ops/cpuidle_ops suspend call. arm_enter_idle_state() doesn't +update 'ret' with this value, meaning we always signal success to +cpuidle_enter_state(), causing it to update the usage counters as if we +succeeded. + +Fixes: 191de17aa3c1 ("ARM64: cpuidle: Replace cpu_suspend by the common ARM/ARM64 function") +Signed-off-by: James Morse +Acked-by: Lorenzo Pieralisi +Acked-by: Daniel Lezcano +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/cpuidle/cpuidle-arm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/cpuidle/cpuidle-arm.c ++++ b/drivers/cpuidle/cpuidle-arm.c +@@ -50,7 +50,7 @@ static int arm_enter_idle_state(struct c + * call the CPU ops suspend protocol with idle index as a + * parameter. + */ +- arm_cpuidle_suspend(idx); ++ ret = arm_cpuidle_suspend(idx); + + cpu_pm_exit(); + } diff --git a/queue-4.4/fs-pnode.c-treat-zero-mnt_group_id-s-as-unequal.patch b/queue-4.4/fs-pnode.c-treat-zero-mnt_group_id-s-as-unequal.patch new file mode 100644 index 00000000000..6b9dcd21c2c --- /dev/null +++ b/queue-4.4/fs-pnode.c-treat-zero-mnt_group_id-s-as-unequal.patch @@ -0,0 +1,80 @@ +From 7ae8fd0351f912b075149a1e03a017be8b903b9a Mon Sep 17 00:00:00 2001 +From: Maxim Patlasov +Date: Tue, 16 Feb 2016 11:45:33 -0800 +Subject: fs/pnode.c: treat zero mnt_group_id-s as unequal + +From: Maxim Patlasov + +commit 7ae8fd0351f912b075149a1e03a017be8b903b9a upstream. + +propagate_one(m) calculates "type" argument for copy_tree() like this: + +> if (m->mnt_group_id == last_dest->mnt_group_id) { +> type = CL_MAKE_SHARED; +> } else { +> type = CL_SLAVE; +> if (IS_MNT_SHARED(m)) +> type |= CL_MAKE_SHARED; +> } + +The "type" argument then governs clone_mnt() behavior with respect to flags +and mnt_master of new mount. When we iterate through a slave group, it is +possible that both current "m" and "last_dest" are not shared (although, +both are slaves, i.e. have non-NULL mnt_master-s). Then the comparison +above erroneously makes new mount shared and sets its mnt_master to +last_source->mnt_master. The patch fixes the problem by handling zero +mnt_group_id-s as though they are unequal. + +The similar problem exists in the implementation of "else" clause above +when we have to ascend upward in the master/slave tree by calling: + +> last_source = last_source->mnt_master; +> last_dest = last_source->mnt_parent; + +proper number of times. The last step is governed by +"n->mnt_group_id != last_dest->mnt_group_id" condition that may lie if +both are zero. The patch fixes this case in the same way as the former one. + +[AV: don't open-code an obvious helper...] + +Signed-off-by: Maxim Patlasov +Signed-off-by: Al Viro +Cc: Seth Forshee +Signed-off-by: Greg Kroah-Hartman + +--- + fs/pnode.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/fs/pnode.c ++++ b/fs/pnode.c +@@ -202,6 +202,11 @@ static struct mount *last_dest, *last_so + static struct mountpoint *mp; + static struct hlist_head *list; + ++static inline bool peers(struct mount *m1, struct mount *m2) ++{ ++ return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id; ++} ++ + static int propagate_one(struct mount *m) + { + struct mount *child; +@@ -212,7 +217,7 @@ static int propagate_one(struct mount *m + /* skip if mountpoint isn't covered by it */ + if (!is_subdir(mp->m_dentry, m->mnt.mnt_root)) + return 0; +- if (m->mnt_group_id == last_dest->mnt_group_id) { ++ if (peers(m, last_dest)) { + type = CL_MAKE_SHARED; + } else { + struct mount *n, *p; +@@ -223,7 +228,7 @@ static int propagate_one(struct mount *m + last_source = last_source->mnt_master; + last_dest = last_source->mnt_parent; + } +- if (n->mnt_group_id != last_dest->mnt_group_id) { ++ if (!peers(n, last_dest)) { + last_source = last_source->mnt_master; + last_dest = last_source->mnt_parent; + } diff --git a/queue-4.4/maintainers-remove-asterisk-from-efi-directory-names.patch b/queue-4.4/maintainers-remove-asterisk-from-efi-directory-names.patch new file mode 100644 index 00000000000..49c2faf23fc --- /dev/null +++ b/queue-4.4/maintainers-remove-asterisk-from-efi-directory-names.patch @@ -0,0 +1,42 @@ +From e8dfe6d8f6762d515fcd4f30577f7bfcf7659887 Mon Sep 17 00:00:00 2001 +From: Matt Fleming +Date: Tue, 3 May 2016 20:29:39 +0100 +Subject: MAINTAINERS: Remove asterisk from EFI directory names + +From: Matt Fleming + +commit e8dfe6d8f6762d515fcd4f30577f7bfcf7659887 upstream. + +Mark reported that having asterisks on the end of directory names +confuses get_maintainer.pl when it encounters subdirectories, and that +my name does not appear when run on drivers/firmware/efi/libstub. + +Reported-by: Mark Rutland +Signed-off-by: Matt Fleming +Cc: Ard Biesheuvel +Cc: Catalin Marinas +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: linux-efi@vger.kernel.org +Link: http://lkml.kernel.org/r/1462303781-8686-2-git-send-email-matt@codeblueprint.co.uk +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman + +--- + MAINTAINERS | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -4097,8 +4097,8 @@ F: Documentation/efi-stub.txt + F: arch/ia64/kernel/efi.c + F: arch/x86/boot/compressed/eboot.[ch] + F: arch/x86/include/asm/efi.h +-F: arch/x86/platform/efi/* +-F: drivers/firmware/efi/* ++F: arch/x86/platform/efi/ ++F: drivers/firmware/efi/ + F: include/linux/efi*.h + + EFI VARIABLE FILESYSTEM diff --git a/queue-4.4/propogate_mnt-handle-the-first-propogated-copy-being-a-slave.patch b/queue-4.4/propogate_mnt-handle-the-first-propogated-copy-being-a-slave.patch new file mode 100644 index 00000000000..3ffb254bd65 --- /dev/null +++ b/queue-4.4/propogate_mnt-handle-the-first-propogated-copy-being-a-slave.patch @@ -0,0 +1,131 @@ +From 5ec0811d30378ae104f250bfc9b3640242d81e3f Mon Sep 17 00:00:00 2001 +From: "Eric W. Biederman" +Date: Thu, 5 May 2016 09:29:29 -0500 +Subject: propogate_mnt: Handle the first propogated copy being a slave + +From: Eric W. Biederman + +commit 5ec0811d30378ae104f250bfc9b3640242d81e3f upstream. + +When the first propgated copy was a slave the following oops would result: +> BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 +> IP: [] propagate_one+0xbe/0x1c0 +> PGD bacd4067 PUD bac66067 PMD 0 +> Oops: 0000 [#1] SMP +> Modules linked in: +> CPU: 1 PID: 824 Comm: mount Not tainted 4.6.0-rc5userns+ #1523 +> Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 +> task: ffff8800bb0a8000 ti: ffff8800bac3c000 task.ti: ffff8800bac3c000 +> RIP: 0010:[] [] propagate_one+0xbe/0x1c0 +> RSP: 0018:ffff8800bac3fd38 EFLAGS: 00010283 +> RAX: 0000000000000000 RBX: ffff8800bb77ec00 RCX: 0000000000000010 +> RDX: 0000000000000000 RSI: ffff8800bb58c000 RDI: ffff8800bb58c480 +> RBP: ffff8800bac3fd48 R08: 0000000000000001 R09: 0000000000000000 +> R10: 0000000000001ca1 R11: 0000000000001c9d R12: 0000000000000000 +> R13: ffff8800ba713800 R14: ffff8800bac3fda0 R15: ffff8800bb77ec00 +> FS: 00007f3c0cd9b7e0(0000) GS:ffff8800bfb00000(0000) knlGS:0000000000000000 +> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +> CR2: 0000000000000010 CR3: 00000000bb79d000 CR4: 00000000000006e0 +> Stack: +> ffff8800bb77ec00 0000000000000000 ffff8800bac3fd88 ffffffff811fbf85 +> ffff8800bac3fd98 ffff8800bb77f080 ffff8800ba713800 ffff8800bb262b40 +> 0000000000000000 0000000000000000 ffff8800bac3fdd8 ffffffff811f1da0 +> Call Trace: +> [] propagate_mnt+0x105/0x140 +> [] attach_recursive_mnt+0x120/0x1e0 +> [] graft_tree+0x63/0x70 +> [] do_add_mount+0x9b/0x100 +> [] do_mount+0x2aa/0xdf0 +> [] ? strndup_user+0x4e/0x70 +> [] SyS_mount+0x75/0xc0 +> [] do_syscall_64+0x4b/0xa0 +> [] entry_SYSCALL64_slow_path+0x25/0x25 +> Code: 00 00 75 ec 48 89 0d 02 22 22 01 8b 89 10 01 00 00 48 89 05 fd 21 22 01 39 8e 10 01 00 00 0f 84 e0 00 00 00 48 8b 80 d8 00 00 00 <48> 8b 50 10 48 89 05 df 21 22 01 48 89 15 d0 21 22 01 8b 53 30 +> RIP [] propagate_one+0xbe/0x1c0 +> RSP +> CR2: 0000000000000010 +> ---[ end trace 2725ecd95164f217 ]--- + +This oops happens with the namespace_sem held and can be triggered by +non-root users. An all around not pleasant experience. + +To avoid this scenario when finding the appropriate source mount to +copy stop the walk up the mnt_master chain when the first source mount +is encountered. + +Further rewrite the walk up the last_source mnt_master chain so that +it is clear what is going on. + +The reason why the first source mount is special is that it it's +mnt_parent is not a mount in the dest_mnt propagation tree, and as +such termination conditions based up on the dest_mnt mount propgation +tree do not make sense. + +To avoid other kinds of confusion last_dest is not changed when +computing last_source. last_dest is only used once in propagate_one +and that is above the point of the code being modified, so changing +the global variable is meaningless and confusing. + +fixes: f2ebb3a921c1ca1e2ddd9242e95a1989a50c4c68 ("smarter propagate_mnt()") +Reported-by: Tycho Andersen +Reviewed-by: Seth Forshee +Tested-by: Seth Forshee +Signed-off-by: "Eric W. Biederman" +Signed-off-by: Greg Kroah-Hartman + +--- + fs/pnode.c | 25 ++++++++++++++----------- + 1 file changed, 14 insertions(+), 11 deletions(-) + +--- a/fs/pnode.c ++++ b/fs/pnode.c +@@ -198,7 +198,7 @@ static struct mount *next_group(struct m + + /* all accesses are serialized by namespace_sem */ + static struct user_namespace *user_ns; +-static struct mount *last_dest, *last_source, *dest_master; ++static struct mount *last_dest, *first_source, *last_source, *dest_master; + static struct mountpoint *mp; + static struct hlist_head *list; + +@@ -221,20 +221,22 @@ static int propagate_one(struct mount *m + type = CL_MAKE_SHARED; + } else { + struct mount *n, *p; ++ bool done; + for (n = m; ; n = p) { + p = n->mnt_master; +- if (p == dest_master || IS_MNT_MARKED(p)) { +- while (last_dest->mnt_master != p) { +- last_source = last_source->mnt_master; +- last_dest = last_source->mnt_parent; +- } +- if (!peers(n, last_dest)) { +- last_source = last_source->mnt_master; +- last_dest = last_source->mnt_parent; +- } ++ if (p == dest_master || IS_MNT_MARKED(p)) + break; +- } + } ++ do { ++ struct mount *parent = last_source->mnt_parent; ++ if (last_source == first_source) ++ break; ++ done = parent->mnt_master == p; ++ if (done && peers(n, parent)) ++ break; ++ last_source = last_source->mnt_master; ++ } while (!done); ++ + type = CL_SLAVE; + /* beginning of peer group among the slaves? */ + if (IS_MNT_SHARED(m)) +@@ -286,6 +288,7 @@ int propagate_mnt(struct mount *dest_mnt + */ + user_ns = current->nsproxy->mnt_ns->user_ns; + last_dest = dest_mnt; ++ first_source = source_mnt; + last_source = source_mnt; + mp = dest_mp; + list = tree_list; diff --git a/queue-4.4/series b/queue-4.4/series index 6e532088f22..7fe412150c7 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -37,3 +37,9 @@ batman-adv-fix-dat-candidate-selection-must-use-vid.patch batman-adv-check-skb-size-before-using-encapsulated-eth-vlan-header.patch batman-adv-fix-broadcast-ogm-queue-limit-on-a-removed-interface.patch batman-adv-reduce-refcnt-of-removed-router-when-updating-route.patch +writeback-fix-performance-regression-in-wb_over_bg_thresh.patch +maintainers-remove-asterisk-from-efi-directory-names.patch +x86-tsc-read-all-ratio-bits-from-msr_platform_info.patch +fs-pnode.c-treat-zero-mnt_group_id-s-as-unequal.patch +propogate_mnt-handle-the-first-propogated-copy-being-a-slave.patch +arm-cpuidle-pass-on-arm_cpuidle_suspend-s-return-value.patch diff --git a/queue-4.4/writeback-fix-performance-regression-in-wb_over_bg_thresh.patch b/queue-4.4/writeback-fix-performance-regression-in-wb_over_bg_thresh.patch new file mode 100644 index 00000000000..a3a39128c86 --- /dev/null +++ b/queue-4.4/writeback-fix-performance-regression-in-wb_over_bg_thresh.patch @@ -0,0 +1,77 @@ +From 74d369443325063a5f0260e63971decb950fd8fa Mon Sep 17 00:00:00 2001 +From: Howard Cochran +Date: Thu, 10 Mar 2016 01:12:39 -0500 +Subject: writeback: Fix performance regression in wb_over_bg_thresh() + +From: Howard Cochran + +commit 74d369443325063a5f0260e63971decb950fd8fa upstream. + +Commit 947e9762a8dd ("writeback: update wb_over_bg_thresh() to use +wb_domain aware operations") unintentionally changed this function's +meaning from "are there more dirty pages than the background writeback +threshold" to "are there more dirty pages than the writeback threshold". +The background writeback threshold is typically half of the writeback +threshold, so this had the effect of raising the number of dirty pages +required to cause a writeback worker to perform background writeout. + +This can cause a very severe performance regression when a BDI uses +BDI_CAP_STRICTLIMIT because balance_dirty_pages() and the writeback worker +can now disagree on whether writeback should be initiated. + +For example, in a system having 1GB of RAM, a single spinning disk, and a +"pass-through" FUSE filesystem mounted over the disk, application code +mmapped a 128MB file on the disk and was randomly dirtying pages in that +mapping. + +Because FUSE uses strictlimit and has a default max_ratio of only 1%, in +balance_dirty_pages, thresh is ~200, bg_thresh is ~100, and the +dirty_freerun_ceiling is the average of those, ~150. So, it pauses the +dirtying processes when we have 151 dirty pages and wakes up a background +writeback worker. But the worker tests the wrong threshold (200 instead of +100), so it does not initiate writeback and just returns. + +Thus, balance_dirty_pages keeps looping, sleeping and then waking up the +worker who will do nothing. It remains stuck in this state until the few +dirty pages that we have finally expire and we write them back for that +reason. Then the whole process repeats, resulting in near-zero throughput +through the FUSE BDI. + +The fix is to call the parameterized variant of wb_calc_thresh, so that the +worker will do writeback if the bg_thresh is exceeded which was the +behavior before the referenced commit. + +Fixes: 947e9762a8dd ("writeback: update wb_over_bg_thresh() to use wb_domain aware operations") +Signed-off-by: Howard Cochran +Acked-by: Tejun Heo +Signed-off-by: Miklos Szeredi +Tested-by Sedat Dilek +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman + +--- + mm/page-writeback.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/mm/page-writeback.c ++++ b/mm/page-writeback.c +@@ -1899,7 +1899,8 @@ bool wb_over_bg_thresh(struct bdi_writeb + if (gdtc->dirty > gdtc->bg_thresh) + return true; + +- if (wb_stat(wb, WB_RECLAIMABLE) > __wb_calc_thresh(gdtc)) ++ if (wb_stat(wb, WB_RECLAIMABLE) > ++ wb_calc_thresh(gdtc->wb, gdtc->bg_thresh)) + return true; + + if (mdtc) { +@@ -1913,7 +1914,8 @@ bool wb_over_bg_thresh(struct bdi_writeb + if (mdtc->dirty > mdtc->bg_thresh) + return true; + +- if (wb_stat(wb, WB_RECLAIMABLE) > __wb_calc_thresh(mdtc)) ++ if (wb_stat(wb, WB_RECLAIMABLE) > ++ wb_calc_thresh(mdtc->wb, mdtc->bg_thresh)) + return true; + } + diff --git a/queue-4.4/x86-tsc-read-all-ratio-bits-from-msr_platform_info.patch b/queue-4.4/x86-tsc-read-all-ratio-bits-from-msr_platform_info.patch new file mode 100644 index 00000000000..abb500fcbc0 --- /dev/null +++ b/queue-4.4/x86-tsc-read-all-ratio-bits-from-msr_platform_info.patch @@ -0,0 +1,45 @@ +From 886123fb3a8656699dff40afa0573df359abeb18 Mon Sep 17 00:00:00 2001 +From: Chen Yu +Date: Fri, 6 May 2016 11:33:39 +0800 +Subject: x86/tsc: Read all ratio bits from MSR_PLATFORM_INFO + +From: Chen Yu + +commit 886123fb3a8656699dff40afa0573df359abeb18 upstream. + +Currently we read the tsc radio: ratio = (MSR_PLATFORM_INFO >> 8) & 0x1f; + +Thus we get bit 8-12 of MSR_PLATFORM_INFO, however according to the SDM +(35.5), the ratio bits are bit 8-15. + +Ignoring the upper bits can result in an incorrect tsc ratio, which causes the +TSC calibration and the Local APIC timer frequency to be incorrect. + +Fix this problem by masking 0xff instead. + +[ tglx: Massaged changelog ] + +Fixes: 7da7c1561366 "x86, tsc: Add static (MSR) TSC calibration on Intel Atom SoCs" +Signed-off-by: Chen Yu +Cc: "Rafael J. Wysocki" +Cc: Bin Gao +Cc: Len Brown +Link: http://lkml.kernel.org/r/1462505619-5516-1-git-send-email-yu.c.chen@intel.com +Signed-off-by: Thomas Gleixner +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/tsc_msr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kernel/tsc_msr.c ++++ b/arch/x86/kernel/tsc_msr.c +@@ -92,7 +92,7 @@ unsigned long try_msr_calibrate_tsc(void + + if (freq_desc_tables[cpu_index].msr_plat) { + rdmsr(MSR_PLATFORM_INFO, lo, hi); +- ratio = (lo >> 8) & 0x1f; ++ ratio = (lo >> 8) & 0xff; + } else { + rdmsr(MSR_IA32_PERF_STATUS, lo, hi); + ratio = (hi >> 8) & 0x1f;