From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 29 Jan 2018 08:10:25 +0000 (+0100)
Subject: 4.14-stable patches
X-Git-Tag: v4.4.114~14
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8d85122c52d14afec113901502e71ef54df632cd;p=thirdparty%2Fkernel%2Fstable-queue.git

4.14-stable patches

added patches:
	hrtimer-reset-hrtimer-cpu-base-proper-on-cpu-hotplug.patch
	perf-x86-amd-power-do-not-load-amd-power-module-on-amd-platforms.patch
	x86-microcode-fix-again-accessing-initrd-after-having-been-freed.patch
	x86-microcode-intel-extend-bdw-late-loading-further-with-llc-size-check.patch
	x86-mm-64-fix-vmapped-stack-syncing-on-very-large-memory-4-level-systems.patch
---

diff --git a/queue-4.14/hrtimer-reset-hrtimer-cpu-base-proper-on-cpu-hotplug.patch b/queue-4.14/hrtimer-reset-hrtimer-cpu-base-proper-on-cpu-hotplug.patch
new file mode 100644
index 00000000000..2696ba5e67b
--- /dev/null
+++ b/queue-4.14/hrtimer-reset-hrtimer-cpu-base-proper-on-cpu-hotplug.patch
@@ -0,0 +1,66 @@
+From d5421ea43d30701e03cadc56a38854c36a8b4433 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 26 Jan 2018 14:54:32 +0100
+Subject: hrtimer: Reset hrtimer cpu base proper on CPU hotplug
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit d5421ea43d30701e03cadc56a38854c36a8b4433 upstream.
+
+The hrtimer interrupt code contains a hang detection and mitigation
+mechanism, which prevents that a long delayed hrtimer interrupt causes a
+continous retriggering of interrupts which prevent the system from making
+progress. If a hang is detected then the timer hardware is programmed with
+a certain delay into the future and a flag is set in the hrtimer cpu base
+which prevents newly enqueued timers from reprogramming the timer hardware
+prior to the chosen delay. The subsequent hrtimer interrupt after the delay
+clears the flag and resumes normal operation.
+
+If such a hang happens in the last hrtimer interrupt before a CPU is
+unplugged then the hang_detected flag is set and stays that way when the
+CPU is plugged in again. At that point the timer hardware is not armed and
+it cannot be armed because the hang_detected flag is still active, so
+nothing clears that flag. As a consequence the CPU does not receive hrtimer
+interrupts and no timers expire on that CPU which results in RCU stalls and
+other malfunctions.
+
+Clear the flag along with some other less critical members of the hrtimer
+cpu base to ensure starting from a clean state when a CPU is plugged in.
+
+Thanks to Paul, Sebastian and Anna-Maria for their help to get down to the
+root cause of that hard to reproduce heisenbug. Once understood it's
+trivial and certainly justifies a brown paperbag.
+
+Fixes: 41d2e4949377 ("hrtimer: Tune hrtimer_interrupt hang logic")
+Reported-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Sebastian Sewior <bigeasy@linutronix.de>
+Cc: Anna-Maria Gleixner <anna-maria@linutronix.de>
+Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801261447590.2067@nanos
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ kernel/time/hrtimer.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/kernel/time/hrtimer.c
++++ b/kernel/time/hrtimer.c
+@@ -655,7 +655,9 @@ static void hrtimer_reprogram(struct hrt
+ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
+ {
+ 	base->expires_next = KTIME_MAX;
++	base->hang_detected = 0;
+ 	base->hres_active = 0;
++	base->next_timer = NULL;
+ }
+ 
+ /*
+@@ -1591,6 +1593,7 @@ int hrtimers_prepare_cpu(unsigned int cp
+ 		timerqueue_init_head(&cpu_base->clock_base[i].active);
+ 	}
+ 
++	cpu_base->active_bases = 0;
+ 	cpu_base->cpu = cpu;
+ 	hrtimer_init_hres(cpu_base);
+ 	return 0;
diff --git a/queue-4.14/perf-x86-amd-power-do-not-load-amd-power-module-on-amd-platforms.patch b/queue-4.14/perf-x86-amd-power-do-not-load-amd-power-module-on-amd-platforms.patch
new file mode 100644
index 00000000000..b6d9187faf1
--- /dev/null
+++ b/queue-4.14/perf-x86-amd-power-do-not-load-amd-power-module-on-amd-platforms.patch
@@ -0,0 +1,45 @@
+From 40d4071ce2d20840d224b4a77b5dc6f752c9ab15 Mon Sep 17 00:00:00 2001
+From: Xiao Liang <xiliang@redhat.com>
+Date: Mon, 22 Jan 2018 14:12:52 +0800
+Subject: perf/x86/amd/power: Do not load AMD power module on !AMD platforms
+
+From: Xiao Liang <xiliang@redhat.com>
+
+commit 40d4071ce2d20840d224b4a77b5dc6f752c9ab15 upstream.
+
+The AMD power module can be loaded on non AMD platforms, but unload fails
+with the following Oops:
+
+ BUG: unable to handle kernel NULL pointer dereference at           (null)
+ IP: __list_del_entry_valid+0x29/0x90
+ Call Trace:
+  perf_pmu_unregister+0x25/0xf0
+  amd_power_pmu_exit+0x1c/0xd23 [power]
+  SyS_delete_module+0x1a8/0x2b0
+  ? exit_to_usermode_loop+0x8f/0xb0
+  entry_SYSCALL_64_fastpath+0x20/0x83
+
+Return -ENODEV instead of 0 from the module init function if the CPU does
+not match.
+
+Fixes: c7ab62bfbe0e ("perf/x86/amd/power: Add AMD accumulated power reporting mechanism")
+Signed-off-by: Xiao Liang <xiliang@redhat.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lkml.kernel.org/r/20180122061252.6394-1-xiliang@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/events/amd/power.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/events/amd/power.c
++++ b/arch/x86/events/amd/power.c
+@@ -277,7 +277,7 @@ static int __init amd_power_pmu_init(voi
+ 	int ret;
+ 
+ 	if (!x86_match_cpu(cpu_match))
+-		return 0;
++		return -ENODEV;
+ 
+ 	if (!boot_cpu_has(X86_FEATURE_ACC_POWER))
+ 		return -ENODEV;
diff --git a/queue-4.14/series b/queue-4.14/series
index 194cce229b5..fce9bfcd0f5 100644
--- a/queue-4.14/series
+++ b/queue-4.14/series
@@ -57,3 +57,8 @@ tls-return-ebusy-if-crypto_info-is-already-set.patch
 tls-reset-crypto_info-when-do_tls_setsockopt_tx-fails.patch
 net-ipv4-make-ip-route-get-match-iif-lo-rules-again.patch
 vmxnet3-repair-memory-leak.patch
+perf-x86-amd-power-do-not-load-amd-power-module-on-amd-platforms.patch
+x86-microcode-intel-extend-bdw-late-loading-further-with-llc-size-check.patch
+x86-microcode-fix-again-accessing-initrd-after-having-been-freed.patch
+x86-mm-64-fix-vmapped-stack-syncing-on-very-large-memory-4-level-systems.patch
+hrtimer-reset-hrtimer-cpu-base-proper-on-cpu-hotplug.patch
diff --git a/queue-4.14/x86-microcode-fix-again-accessing-initrd-after-having-been-freed.patch b/queue-4.14/x86-microcode-fix-again-accessing-initrd-after-having-been-freed.patch
new file mode 100644
index 00000000000..c0c1b27617b
--- /dev/null
+++ b/queue-4.14/x86-microcode-fix-again-accessing-initrd-after-having-been-freed.patch
@@ -0,0 +1,65 @@
+From 1d080f096fe33f031d26e19b3ef0146f66b8b0f1 Mon Sep 17 00:00:00 2001
+From: Borislav Petkov <bp@suse.de>
+Date: Tue, 23 Jan 2018 11:41:33 +0100
+Subject: x86/microcode: Fix again accessing initrd after having been freed
+
+From: Borislav Petkov <bp@suse.de>
+
+commit 1d080f096fe33f031d26e19b3ef0146f66b8b0f1 upstream.
+
+Commit 24c2503255d3 ("x86/microcode: Do not access the initrd after it has
+been freed") fixed attempts to access initrd from the microcode loader
+after it has been freed. However, a similar KASAN warning was reported
+(stack trace edited):
+
+  smpboot: Booting Node 0 Processor 1 APIC 0x11
+  ==================================================================
+  BUG: KASAN: use-after-free in find_cpio_data+0x9b5/0xa50
+  Read of size 1 at addr ffff880035ffd000 by task swapper/1/0
+
+  CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.14.8-slack #7
+  Hardware name: System manufacturer System Product Name/A88X-PLUS, BIOS 3003 03/10/2016
+  Call Trace:
+   dump_stack
+   print_address_description
+   kasan_report
+   ? find_cpio_data
+   __asan_report_load1_noabort
+   find_cpio_data
+   find_microcode_in_initrd
+   __load_ucode_amd
+   load_ucode_amd_ap
+      load_ucode_ap
+
+After some investigation, it turned out that a merge was done using the
+wrong side to resolve, leading to picking up the previous state, before
+the 24c2503255d3 fix. Therefore the Fixes tag below contains a merge
+commit.
+
+Revert the mismerge by catching the save_microcode_in_initrd_amd()
+retval and thus letting the function exit with the last return statement
+so that initrd_gone can be set to true.
+
+Fixes: f26483eaedec ("Merge branch 'x86/urgent' into x86/microcode, to resolve conflicts")
+Reported-by: <higuita@gmx.net>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=198295
+Link: https://lkml.kernel.org/r/20180123104133.918-2-bp@alien8.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/microcode/core.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/microcode/core.c
++++ b/arch/x86/kernel/cpu/microcode/core.c
+@@ -239,7 +239,7 @@ static int __init save_microcode_in_init
+ 		break;
+ 	case X86_VENDOR_AMD:
+ 		if (c->x86 >= 0x10)
+-			return save_microcode_in_initrd_amd(cpuid_eax(1));
++			ret = save_microcode_in_initrd_amd(cpuid_eax(1));
+ 		break;
+ 	default:
+ 		break;
diff --git a/queue-4.14/x86-microcode-intel-extend-bdw-late-loading-further-with-llc-size-check.patch b/queue-4.14/x86-microcode-intel-extend-bdw-late-loading-further-with-llc-size-check.patch
new file mode 100644
index 00000000000..61198dfdaf0
--- /dev/null
+++ b/queue-4.14/x86-microcode-intel-extend-bdw-late-loading-further-with-llc-size-check.patch
@@ -0,0 +1,89 @@
+From 7e702d17ed138cf4ae7c00e8c00681ed464587c7 Mon Sep 17 00:00:00 2001
+From: Jia Zhang <zhang.jia@linux.alibaba.com>
+Date: Tue, 23 Jan 2018 11:41:32 +0100
+Subject: x86/microcode/intel: Extend BDW late-loading further with LLC size check
+
+From: Jia Zhang <zhang.jia@linux.alibaba.com>
+
+commit 7e702d17ed138cf4ae7c00e8c00681ed464587c7 upstream.
+
+Commit b94b73733171 ("x86/microcode/intel: Extend BDW late-loading with a
+revision check") reduced the impact of erratum BDF90 for Broadwell model
+79.
+
+The impact can be reduced further by checking the size of the last level
+cache portion per core.
+
+Tony: "The erratum says the problem only occurs on the large-cache SKUs.
+So we only need to avoid the update if we are on a big cache SKU that is
+also running old microcode."
+
+For more details, see erratum BDF90 in document #334165 (Intel Xeon
+Processor E7-8800/4800 v4 Product Family Specification Update) from
+September 2017.
+
+Fixes: b94b73733171 ("x86/microcode/intel: Extend BDW late-loading with a revision check")
+Signed-off-by: Jia Zhang <zhang.jia@linux.alibaba.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Tony Luck <tony.luck@intel.com>
+Link: https://lkml.kernel.org/r/1516321542-31161-1-git-send-email-zhang.jia@linux.alibaba.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/kernel/cpu/microcode/intel.c |   20 ++++++++++++++++++--
+ 1 file changed, 18 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/kernel/cpu/microcode/intel.c
++++ b/arch/x86/kernel/cpu/microcode/intel.c
+@@ -45,6 +45,9 @@ static const char ucode_path[] = "kernel
+ /* Current microcode patch used in early patching on the APs. */
+ static struct microcode_intel *intel_ucode_patch;
+ 
++/* last level cache size per core */
++static int llc_size_per_core;
++
+ static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1,
+ 					unsigned int s2, unsigned int p2)
+ {
+@@ -912,12 +915,14 @@ static bool is_blacklisted(unsigned int
+ 
+ 	/*
+ 	 * Late loading on model 79 with microcode revision less than 0x0b000021
+-	 * may result in a system hang. This behavior is documented in item
+-	 * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
++	 * and LLC size per core bigger than 2.5MB may result in a system hang.
++	 * This behavior is documented in item BDF90, #334165 (Intel Xeon
++	 * Processor E7-8800/4800 v4 Product Family).
+ 	 */
+ 	if (c->x86 == 6 &&
+ 	    c->x86_model == INTEL_FAM6_BROADWELL_X &&
+ 	    c->x86_mask == 0x01 &&
++	    llc_size_per_core > 2621440 &&
+ 	    c->microcode < 0x0b000021) {
+ 		pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
+ 		pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
+@@ -975,6 +980,15 @@ static struct microcode_ops microcode_in
+ 	.apply_microcode                  = apply_microcode_intel,
+ };
+ 
++static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c)
++{
++	u64 llc_size = c->x86_cache_size * 1024;
++
++	do_div(llc_size, c->x86_max_cores);
++
++	return (int)llc_size;
++}
++
+ struct microcode_ops * __init init_intel_microcode(void)
+ {
+ 	struct cpuinfo_x86 *c = &boot_cpu_data;
+@@ -985,5 +999,7 @@ struct microcode_ops * __init init_intel
+ 		return NULL;
+ 	}
+ 
++	llc_size_per_core = calc_llc_size_per_core(c);
++
+ 	return &microcode_intel_ops;
+ }
diff --git a/queue-4.14/x86-mm-64-fix-vmapped-stack-syncing-on-very-large-memory-4-level-systems.patch b/queue-4.14/x86-mm-64-fix-vmapped-stack-syncing-on-very-large-memory-4-level-systems.patch
new file mode 100644
index 00000000000..f8a15587e49
--- /dev/null
+++ b/queue-4.14/x86-mm-64-fix-vmapped-stack-syncing-on-very-large-memory-4-level-systems.patch
@@ -0,0 +1,92 @@
+From 5beda7d54eafece4c974cfa9fbb9f60fb18fd20a Mon Sep 17 00:00:00 2001
+From: Andy Lutomirski <luto@kernel.org>
+Date: Thu, 25 Jan 2018 13:12:14 -0800
+Subject: x86/mm/64: Fix vmapped stack syncing on very-large-memory 4-level systems
+
+From: Andy Lutomirski <luto@kernel.org>
+
+commit 5beda7d54eafece4c974cfa9fbb9f60fb18fd20a upstream.
+
+Neil Berrington reported a double-fault on a VM with 768GB of RAM that uses
+large amounts of vmalloc space with PTI enabled.
+
+The cause is that load_new_mm_cr3() was never fixed to take the 5-level pgd
+folding code into account, so, on a 4-level kernel, the pgd synchronization
+logic compiles away to exactly nothing.
+
+Interestingly, the problem doesn't trigger with nopti.  I assume this is
+because the kernel is mapped with global pages if we boot with nopti.  The
+sequence of operations when we create a new task is that we first load its
+mm while still running on the old stack (which crashes if the old stack is
+unmapped in the new mm unless the TLB saves us), then we call
+prepare_switch_to(), and then we switch to the new stack.
+prepare_switch_to() pokes the new stack directly, which will populate the
+mapping through vmalloc_fault().  I assume that we're getting lucky on
+non-PTI systems -- the old stack's TLB entry stays alive long enough to
+make it all the way through prepare_switch_to() and switch_to() so that we
+make it to a valid stack.
+
+Fixes: b50858ce3e2a ("x86/mm/vmalloc: Add 5-level paging support")
+Reported-and-tested-by: Neil Berrington <neil.berrington@datacore.com>
+Signed-off-by: Andy Lutomirski <luto@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Cc: Borislav Petkov <bp@alien8.de>
+Link: https://lkml.kernel.org/r/346541c56caed61abbe693d7d2742b4a380c5001.1516914529.git.luto@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/x86/mm/tlb.c |   34 +++++++++++++++++++++++++++++-----
+ 1 file changed, 29 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -151,6 +151,34 @@ void switch_mm(struct mm_struct *prev, s
+ 	local_irq_restore(flags);
+ }
+ 
++static void sync_current_stack_to_mm(struct mm_struct *mm)
++{
++	unsigned long sp = current_stack_pointer;
++	pgd_t *pgd = pgd_offset(mm, sp);
++
++	if (CONFIG_PGTABLE_LEVELS > 4) {
++		if (unlikely(pgd_none(*pgd))) {
++			pgd_t *pgd_ref = pgd_offset_k(sp);
++
++			set_pgd(pgd, *pgd_ref);
++		}
++	} else {
++		/*
++		 * "pgd" is faked.  The top level entries are "p4d"s, so sync
++		 * the p4d.  This compiles to approximately the same code as
++		 * the 5-level case.
++		 */
++		p4d_t *p4d = p4d_offset(pgd, sp);
++
++		if (unlikely(p4d_none(*p4d))) {
++			pgd_t *pgd_ref = pgd_offset_k(sp);
++			p4d_t *p4d_ref = p4d_offset(pgd_ref, sp);
++
++			set_p4d(p4d, *p4d_ref);
++		}
++	}
++}
++
+ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ 			struct task_struct *tsk)
+ {
+@@ -226,11 +254,7 @@ void switch_mm_irqs_off(struct mm_struct
+ 			 * mapped in the new pgd, we'll double-fault.  Forcibly
+ 			 * map it.
+ 			 */
+-			unsigned int index = pgd_index(current_stack_pointer);
+-			pgd_t *pgd = next->pgd + index;
+-
+-			if (unlikely(pgd_none(*pgd)))
+-				set_pgd(pgd, init_mm.pgd[index]);
++			sync_current_stack_to_mm(next);
+ 		}
+ 
+ 		/* Stop remote flushes for the previous mm */