From: Greg Kroah-Hartman Date: Mon, 29 Jan 2018 08:10:25 +0000 (+0100) Subject: 4.14-stable patches X-Git-Tag: v4.4.114~14 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8d85122c52d14afec113901502e71ef54df632cd;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: hrtimer-reset-hrtimer-cpu-base-proper-on-cpu-hotplug.patch perf-x86-amd-power-do-not-load-amd-power-module-on-amd-platforms.patch x86-microcode-fix-again-accessing-initrd-after-having-been-freed.patch x86-microcode-intel-extend-bdw-late-loading-further-with-llc-size-check.patch x86-mm-64-fix-vmapped-stack-syncing-on-very-large-memory-4-level-systems.patch --- diff --git a/queue-4.14/hrtimer-reset-hrtimer-cpu-base-proper-on-cpu-hotplug.patch b/queue-4.14/hrtimer-reset-hrtimer-cpu-base-proper-on-cpu-hotplug.patch new file mode 100644 index 00000000000..2696ba5e67b --- /dev/null +++ b/queue-4.14/hrtimer-reset-hrtimer-cpu-base-proper-on-cpu-hotplug.patch @@ -0,0 +1,66 @@ +From d5421ea43d30701e03cadc56a38854c36a8b4433 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Fri, 26 Jan 2018 14:54:32 +0100 +Subject: hrtimer: Reset hrtimer cpu base proper on CPU hotplug + +From: Thomas Gleixner + +commit d5421ea43d30701e03cadc56a38854c36a8b4433 upstream. + +The hrtimer interrupt code contains a hang detection and mitigation +mechanism, which prevents that a long delayed hrtimer interrupt causes a +continous retriggering of interrupts which prevent the system from making +progress. If a hang is detected then the timer hardware is programmed with +a certain delay into the future and a flag is set in the hrtimer cpu base +which prevents newly enqueued timers from reprogramming the timer hardware +prior to the chosen delay. The subsequent hrtimer interrupt after the delay +clears the flag and resumes normal operation. + +If such a hang happens in the last hrtimer interrupt before a CPU is +unplugged then the hang_detected flag is set and stays that way when the +CPU is plugged in again. At that point the timer hardware is not armed and +it cannot be armed because the hang_detected flag is still active, so +nothing clears that flag. As a consequence the CPU does not receive hrtimer +interrupts and no timers expire on that CPU which results in RCU stalls and +other malfunctions. + +Clear the flag along with some other less critical members of the hrtimer +cpu base to ensure starting from a clean state when a CPU is plugged in. + +Thanks to Paul, Sebastian and Anna-Maria for their help to get down to the +root cause of that hard to reproduce heisenbug. Once understood it's +trivial and certainly justifies a brown paperbag. + +Fixes: 41d2e4949377 ("hrtimer: Tune hrtimer_interrupt hang logic") +Reported-by: Paul E. McKenney +Signed-off-by: Thomas Gleixner +Cc: Peter Zijlstra +Cc: Sebastian Sewior +Cc: Anna-Maria Gleixner +Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801261447590.2067@nanos +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/time/hrtimer.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -655,7 +655,9 @@ static void hrtimer_reprogram(struct hrt + static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) + { + base->expires_next = KTIME_MAX; ++ base->hang_detected = 0; + base->hres_active = 0; ++ base->next_timer = NULL; + } + + /* +@@ -1591,6 +1593,7 @@ int hrtimers_prepare_cpu(unsigned int cp + timerqueue_init_head(&cpu_base->clock_base[i].active); + } + ++ cpu_base->active_bases = 0; + cpu_base->cpu = cpu; + hrtimer_init_hres(cpu_base); + return 0; diff --git a/queue-4.14/perf-x86-amd-power-do-not-load-amd-power-module-on-amd-platforms.patch b/queue-4.14/perf-x86-amd-power-do-not-load-amd-power-module-on-amd-platforms.patch new file mode 100644 index 00000000000..b6d9187faf1 --- /dev/null +++ b/queue-4.14/perf-x86-amd-power-do-not-load-amd-power-module-on-amd-platforms.patch @@ -0,0 +1,45 @@ +From 40d4071ce2d20840d224b4a77b5dc6f752c9ab15 Mon Sep 17 00:00:00 2001 +From: Xiao Liang +Date: Mon, 22 Jan 2018 14:12:52 +0800 +Subject: perf/x86/amd/power: Do not load AMD power module on !AMD platforms + +From: Xiao Liang + +commit 40d4071ce2d20840d224b4a77b5dc6f752c9ab15 upstream. + +The AMD power module can be loaded on non AMD platforms, but unload fails +with the following Oops: + + BUG: unable to handle kernel NULL pointer dereference at (null) + IP: __list_del_entry_valid+0x29/0x90 + Call Trace: + perf_pmu_unregister+0x25/0xf0 + amd_power_pmu_exit+0x1c/0xd23 [power] + SyS_delete_module+0x1a8/0x2b0 + ? exit_to_usermode_loop+0x8f/0xb0 + entry_SYSCALL_64_fastpath+0x20/0x83 + +Return -ENODEV instead of 0 from the module init function if the CPU does +not match. + +Fixes: c7ab62bfbe0e ("perf/x86/amd/power: Add AMD accumulated power reporting mechanism") +Signed-off-by: Xiao Liang +Signed-off-by: Thomas Gleixner +Link: https://lkml.kernel.org/r/20180122061252.6394-1-xiliang@redhat.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/events/amd/power.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/events/amd/power.c ++++ b/arch/x86/events/amd/power.c +@@ -277,7 +277,7 @@ static int __init amd_power_pmu_init(voi + int ret; + + if (!x86_match_cpu(cpu_match)) +- return 0; ++ return -ENODEV; + + if (!boot_cpu_has(X86_FEATURE_ACC_POWER)) + return -ENODEV; diff --git a/queue-4.14/series b/queue-4.14/series index 194cce229b5..fce9bfcd0f5 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -57,3 +57,8 @@ tls-return-ebusy-if-crypto_info-is-already-set.patch tls-reset-crypto_info-when-do_tls_setsockopt_tx-fails.patch net-ipv4-make-ip-route-get-match-iif-lo-rules-again.patch vmxnet3-repair-memory-leak.patch +perf-x86-amd-power-do-not-load-amd-power-module-on-amd-platforms.patch +x86-microcode-intel-extend-bdw-late-loading-further-with-llc-size-check.patch +x86-microcode-fix-again-accessing-initrd-after-having-been-freed.patch +x86-mm-64-fix-vmapped-stack-syncing-on-very-large-memory-4-level-systems.patch +hrtimer-reset-hrtimer-cpu-base-proper-on-cpu-hotplug.patch diff --git a/queue-4.14/x86-microcode-fix-again-accessing-initrd-after-having-been-freed.patch b/queue-4.14/x86-microcode-fix-again-accessing-initrd-after-having-been-freed.patch new file mode 100644 index 00000000000..c0c1b27617b --- /dev/null +++ b/queue-4.14/x86-microcode-fix-again-accessing-initrd-after-having-been-freed.patch @@ -0,0 +1,65 @@ +From 1d080f096fe33f031d26e19b3ef0146f66b8b0f1 Mon Sep 17 00:00:00 2001 +From: Borislav Petkov +Date: Tue, 23 Jan 2018 11:41:33 +0100 +Subject: x86/microcode: Fix again accessing initrd after having been freed + +From: Borislav Petkov + +commit 1d080f096fe33f031d26e19b3ef0146f66b8b0f1 upstream. + +Commit 24c2503255d3 ("x86/microcode: Do not access the initrd after it has +been freed") fixed attempts to access initrd from the microcode loader +after it has been freed. However, a similar KASAN warning was reported +(stack trace edited): + + smpboot: Booting Node 0 Processor 1 APIC 0x11 + ================================================================== + BUG: KASAN: use-after-free in find_cpio_data+0x9b5/0xa50 + Read of size 1 at addr ffff880035ffd000 by task swapper/1/0 + + CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.14.8-slack #7 + Hardware name: System manufacturer System Product Name/A88X-PLUS, BIOS 3003 03/10/2016 + Call Trace: + dump_stack + print_address_description + kasan_report + ? find_cpio_data + __asan_report_load1_noabort + find_cpio_data + find_microcode_in_initrd + __load_ucode_amd + load_ucode_amd_ap + load_ucode_ap + +After some investigation, it turned out that a merge was done using the +wrong side to resolve, leading to picking up the previous state, before +the 24c2503255d3 fix. Therefore the Fixes tag below contains a merge +commit. + +Revert the mismerge by catching the save_microcode_in_initrd_amd() +retval and thus letting the function exit with the last return statement +so that initrd_gone can be set to true. + +Fixes: f26483eaedec ("Merge branch 'x86/urgent' into x86/microcode, to resolve conflicts") +Reported-by: +Signed-off-by: Borislav Petkov +Signed-off-by: Thomas Gleixner +Link: https://bugzilla.kernel.org/show_bug.cgi?id=198295 +Link: https://lkml.kernel.org/r/20180123104133.918-2-bp@alien8.de +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/microcode/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kernel/cpu/microcode/core.c ++++ b/arch/x86/kernel/cpu/microcode/core.c +@@ -239,7 +239,7 @@ static int __init save_microcode_in_init + break; + case X86_VENDOR_AMD: + if (c->x86 >= 0x10) +- return save_microcode_in_initrd_amd(cpuid_eax(1)); ++ ret = save_microcode_in_initrd_amd(cpuid_eax(1)); + break; + default: + break; diff --git a/queue-4.14/x86-microcode-intel-extend-bdw-late-loading-further-with-llc-size-check.patch b/queue-4.14/x86-microcode-intel-extend-bdw-late-loading-further-with-llc-size-check.patch new file mode 100644 index 00000000000..61198dfdaf0 --- /dev/null +++ b/queue-4.14/x86-microcode-intel-extend-bdw-late-loading-further-with-llc-size-check.patch @@ -0,0 +1,89 @@ +From 7e702d17ed138cf4ae7c00e8c00681ed464587c7 Mon Sep 17 00:00:00 2001 +From: Jia Zhang +Date: Tue, 23 Jan 2018 11:41:32 +0100 +Subject: x86/microcode/intel: Extend BDW late-loading further with LLC size check + +From: Jia Zhang + +commit 7e702d17ed138cf4ae7c00e8c00681ed464587c7 upstream. + +Commit b94b73733171 ("x86/microcode/intel: Extend BDW late-loading with a +revision check") reduced the impact of erratum BDF90 for Broadwell model +79. + +The impact can be reduced further by checking the size of the last level +cache portion per core. + +Tony: "The erratum says the problem only occurs on the large-cache SKUs. +So we only need to avoid the update if we are on a big cache SKU that is +also running old microcode." + +For more details, see erratum BDF90 in document #334165 (Intel Xeon +Processor E7-8800/4800 v4 Product Family Specification Update) from +September 2017. + +Fixes: b94b73733171 ("x86/microcode/intel: Extend BDW late-loading with a revision check") +Signed-off-by: Jia Zhang +Signed-off-by: Borislav Petkov +Signed-off-by: Thomas Gleixner +Acked-by: Tony Luck +Link: https://lkml.kernel.org/r/1516321542-31161-1-git-send-email-zhang.jia@linux.alibaba.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/microcode/intel.c | 20 ++++++++++++++++++-- + 1 file changed, 18 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/cpu/microcode/intel.c ++++ b/arch/x86/kernel/cpu/microcode/intel.c +@@ -45,6 +45,9 @@ static const char ucode_path[] = "kernel + /* Current microcode patch used in early patching on the APs. */ + static struct microcode_intel *intel_ucode_patch; + ++/* last level cache size per core */ ++static int llc_size_per_core; ++ + static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1, + unsigned int s2, unsigned int p2) + { +@@ -912,12 +915,14 @@ static bool is_blacklisted(unsigned int + + /* + * Late loading on model 79 with microcode revision less than 0x0b000021 +- * may result in a system hang. This behavior is documented in item +- * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family). ++ * and LLC size per core bigger than 2.5MB may result in a system hang. ++ * This behavior is documented in item BDF90, #334165 (Intel Xeon ++ * Processor E7-8800/4800 v4 Product Family). + */ + if (c->x86 == 6 && + c->x86_model == INTEL_FAM6_BROADWELL_X && + c->x86_mask == 0x01 && ++ llc_size_per_core > 2621440 && + c->microcode < 0x0b000021) { + pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode); + pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); +@@ -975,6 +980,15 @@ static struct microcode_ops microcode_in + .apply_microcode = apply_microcode_intel, + }; + ++static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c) ++{ ++ u64 llc_size = c->x86_cache_size * 1024; ++ ++ do_div(llc_size, c->x86_max_cores); ++ ++ return (int)llc_size; ++} ++ + struct microcode_ops * __init init_intel_microcode(void) + { + struct cpuinfo_x86 *c = &boot_cpu_data; +@@ -985,5 +999,7 @@ struct microcode_ops * __init init_intel + return NULL; + } + ++ llc_size_per_core = calc_llc_size_per_core(c); ++ + return µcode_intel_ops; + } diff --git a/queue-4.14/x86-mm-64-fix-vmapped-stack-syncing-on-very-large-memory-4-level-systems.patch b/queue-4.14/x86-mm-64-fix-vmapped-stack-syncing-on-very-large-memory-4-level-systems.patch new file mode 100644 index 00000000000..f8a15587e49 --- /dev/null +++ b/queue-4.14/x86-mm-64-fix-vmapped-stack-syncing-on-very-large-memory-4-level-systems.patch @@ -0,0 +1,92 @@ +From 5beda7d54eafece4c974cfa9fbb9f60fb18fd20a Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Thu, 25 Jan 2018 13:12:14 -0800 +Subject: x86/mm/64: Fix vmapped stack syncing on very-large-memory 4-level systems + +From: Andy Lutomirski + +commit 5beda7d54eafece4c974cfa9fbb9f60fb18fd20a upstream. + +Neil Berrington reported a double-fault on a VM with 768GB of RAM that uses +large amounts of vmalloc space with PTI enabled. + +The cause is that load_new_mm_cr3() was never fixed to take the 5-level pgd +folding code into account, so, on a 4-level kernel, the pgd synchronization +logic compiles away to exactly nothing. + +Interestingly, the problem doesn't trigger with nopti. I assume this is +because the kernel is mapped with global pages if we boot with nopti. The +sequence of operations when we create a new task is that we first load its +mm while still running on the old stack (which crashes if the old stack is +unmapped in the new mm unless the TLB saves us), then we call +prepare_switch_to(), and then we switch to the new stack. +prepare_switch_to() pokes the new stack directly, which will populate the +mapping through vmalloc_fault(). I assume that we're getting lucky on +non-PTI systems -- the old stack's TLB entry stays alive long enough to +make it all the way through prepare_switch_to() and switch_to() so that we +make it to a valid stack. + +Fixes: b50858ce3e2a ("x86/mm/vmalloc: Add 5-level paging support") +Reported-and-tested-by: Neil Berrington +Signed-off-by: Andy Lutomirski +Signed-off-by: Thomas Gleixner +Cc: Konstantin Khlebnikov +Cc: Dave Hansen +Cc: Borislav Petkov +Link: https://lkml.kernel.org/r/346541c56caed61abbe693d7d2742b4a380c5001.1516914529.git.luto@kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/mm/tlb.c | 34 +++++++++++++++++++++++++++++----- + 1 file changed, 29 insertions(+), 5 deletions(-) + +--- a/arch/x86/mm/tlb.c ++++ b/arch/x86/mm/tlb.c +@@ -151,6 +151,34 @@ void switch_mm(struct mm_struct *prev, s + local_irq_restore(flags); + } + ++static void sync_current_stack_to_mm(struct mm_struct *mm) ++{ ++ unsigned long sp = current_stack_pointer; ++ pgd_t *pgd = pgd_offset(mm, sp); ++ ++ if (CONFIG_PGTABLE_LEVELS > 4) { ++ if (unlikely(pgd_none(*pgd))) { ++ pgd_t *pgd_ref = pgd_offset_k(sp); ++ ++ set_pgd(pgd, *pgd_ref); ++ } ++ } else { ++ /* ++ * "pgd" is faked. The top level entries are "p4d"s, so sync ++ * the p4d. This compiles to approximately the same code as ++ * the 5-level case. ++ */ ++ p4d_t *p4d = p4d_offset(pgd, sp); ++ ++ if (unlikely(p4d_none(*p4d))) { ++ pgd_t *pgd_ref = pgd_offset_k(sp); ++ p4d_t *p4d_ref = p4d_offset(pgd_ref, sp); ++ ++ set_p4d(p4d, *p4d_ref); ++ } ++ } ++} ++ + void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) + { +@@ -226,11 +254,7 @@ void switch_mm_irqs_off(struct mm_struct + * mapped in the new pgd, we'll double-fault. Forcibly + * map it. + */ +- unsigned int index = pgd_index(current_stack_pointer); +- pgd_t *pgd = next->pgd + index; +- +- if (unlikely(pgd_none(*pgd))) +- set_pgd(pgd, init_mm.pgd[index]); ++ sync_current_stack_to_mm(next); + } + + /* Stop remote flushes for the previous mm */