From: Greg Kroah-Hartman Date: Mon, 29 Jan 2018 08:10:49 +0000 (+0100) Subject: 4.4-stable patches X-Git-Tag: v4.4.114~13 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=aaea4156dfe045e474e896472ec484260ec7e12a;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: hrtimer-reset-hrtimer-cpu-base-proper-on-cpu-hotplug.patch x86-microcode-intel-extend-bdw-late-loading-further-with-llc-size-check.patch --- diff --git a/queue-4.4/hrtimer-reset-hrtimer-cpu-base-proper-on-cpu-hotplug.patch b/queue-4.4/hrtimer-reset-hrtimer-cpu-base-proper-on-cpu-hotplug.patch new file mode 100644 index 00000000000..59913bb2633 --- /dev/null +++ b/queue-4.4/hrtimer-reset-hrtimer-cpu-base-proper-on-cpu-hotplug.patch @@ -0,0 +1,66 @@ +From d5421ea43d30701e03cadc56a38854c36a8b4433 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner +Date: Fri, 26 Jan 2018 14:54:32 +0100 +Subject: hrtimer: Reset hrtimer cpu base proper on CPU hotplug + +From: Thomas Gleixner + +commit d5421ea43d30701e03cadc56a38854c36a8b4433 upstream. + +The hrtimer interrupt code contains a hang detection and mitigation +mechanism, which prevents that a long delayed hrtimer interrupt causes a +continous retriggering of interrupts which prevent the system from making +progress. If a hang is detected then the timer hardware is programmed with +a certain delay into the future and a flag is set in the hrtimer cpu base +which prevents newly enqueued timers from reprogramming the timer hardware +prior to the chosen delay. The subsequent hrtimer interrupt after the delay +clears the flag and resumes normal operation. + +If such a hang happens in the last hrtimer interrupt before a CPU is +unplugged then the hang_detected flag is set and stays that way when the +CPU is plugged in again. At that point the timer hardware is not armed and +it cannot be armed because the hang_detected flag is still active, so +nothing clears that flag. As a consequence the CPU does not receive hrtimer +interrupts and no timers expire on that CPU which results in RCU stalls and +other malfunctions. + +Clear the flag along with some other less critical members of the hrtimer +cpu base to ensure starting from a clean state when a CPU is plugged in. + +Thanks to Paul, Sebastian and Anna-Maria for their help to get down to the +root cause of that hard to reproduce heisenbug. Once understood it's +trivial and certainly justifies a brown paperbag. + +Fixes: 41d2e4949377 ("hrtimer: Tune hrtimer_interrupt hang logic") +Reported-by: Paul E. McKenney +Signed-off-by: Thomas Gleixner +Cc: Peter Zijlstra +Cc: Sebastian Sewior +Cc: Anna-Maria Gleixner +Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1801261447590.2067@nanos +Signed-off-by: Greg Kroah-Hartman + +--- + kernel/time/hrtimer.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -669,7 +669,9 @@ static void hrtimer_reprogram(struct hrt + static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) + { + base->expires_next.tv64 = KTIME_MAX; ++ base->hang_detected = 0; + base->hres_active = 0; ++ base->next_timer = NULL; + } + + /* +@@ -1615,6 +1617,7 @@ static void init_hrtimers_cpu(int cpu) + timerqueue_init_head(&cpu_base->clock_base[i].active); + } + ++ cpu_base->active_bases = 0; + cpu_base->cpu = cpu; + hrtimer_init_hres(cpu_base); + } diff --git a/queue-4.4/series b/queue-4.4/series index c9e296ccd68..87b802b78ce 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -53,3 +53,5 @@ usbip-prevent-leaking-socket-pointer-address-in-messages.patch um-link-vmlinux-with-no-pie.patch vsyscall-fix-permissions-for-emulate-mode-with-kaiser-pti.patch eventpoll.h-add-missing-epoll-event-masks.patch +x86-microcode-intel-extend-bdw-late-loading-further-with-llc-size-check.patch +hrtimer-reset-hrtimer-cpu-base-proper-on-cpu-hotplug.patch diff --git a/queue-4.4/x86-microcode-intel-extend-bdw-late-loading-further-with-llc-size-check.patch b/queue-4.4/x86-microcode-intel-extend-bdw-late-loading-further-with-llc-size-check.patch new file mode 100644 index 00000000000..9cac24f7fac --- /dev/null +++ b/queue-4.4/x86-microcode-intel-extend-bdw-late-loading-further-with-llc-size-check.patch @@ -0,0 +1,90 @@ +From 7e702d17ed138cf4ae7c00e8c00681ed464587c7 Mon Sep 17 00:00:00 2001 +From: Jia Zhang +Date: Tue, 23 Jan 2018 11:41:32 +0100 +Subject: x86/microcode/intel: Extend BDW late-loading further with LLC size check + +From: Jia Zhang + +commit 7e702d17ed138cf4ae7c00e8c00681ed464587c7 upstream. + +Commit b94b73733171 ("x86/microcode/intel: Extend BDW late-loading with a +revision check") reduced the impact of erratum BDF90 for Broadwell model +79. + +The impact can be reduced further by checking the size of the last level +cache portion per core. + +Tony: "The erratum says the problem only occurs on the large-cache SKUs. +So we only need to avoid the update if we are on a big cache SKU that is +also running old microcode." + +For more details, see erratum BDF90 in document #334165 (Intel Xeon +Processor E7-8800/4800 v4 Product Family Specification Update) from +September 2017. + +Fixes: b94b73733171 ("x86/microcode/intel: Extend BDW late-loading with a revision check") +Signed-off-by: Jia Zhang +Signed-off-by: Borislav Petkov +Signed-off-by: Thomas Gleixner +Acked-by: Tony Luck +Link: https://lkml.kernel.org/r/1516321542-31161-1-git-send-email-zhang.jia@linux.alibaba.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/microcode/intel.c | 20 ++++++++++++++++++-- + 1 file changed, 18 insertions(+), 2 deletions(-) + +--- a/arch/x86/kernel/cpu/microcode/intel.c ++++ b/arch/x86/kernel/cpu/microcode/intel.c +@@ -39,6 +39,9 @@ + #include + #include + ++/* last level cache size per core */ ++static int llc_size_per_core; ++ + static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT]; + static struct mc_saved_data { + unsigned int mc_saved_count; +@@ -996,12 +999,14 @@ static bool is_blacklisted(unsigned int + + /* + * Late loading on model 79 with microcode revision less than 0x0b000021 +- * may result in a system hang. This behavior is documented in item +- * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family). ++ * and LLC size per core bigger than 2.5MB may result in a system hang. ++ * This behavior is documented in item BDF90, #334165 (Intel Xeon ++ * Processor E7-8800/4800 v4 Product Family). + */ + if (c->x86 == 6 && + c->x86_model == 79 && + c->x86_mask == 0x01 && ++ llc_size_per_core > 2621440 && + c->microcode < 0x0b000021) { + pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode); + pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); +@@ -1068,6 +1073,15 @@ static struct microcode_ops microcode_in + .microcode_fini_cpu = microcode_fini_cpu, + }; + ++static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c) ++{ ++ u64 llc_size = c->x86_cache_size * 1024; ++ ++ do_div(llc_size, c->x86_max_cores); ++ ++ return (int)llc_size; ++} ++ + struct microcode_ops * __init init_intel_microcode(void) + { + struct cpuinfo_x86 *c = &boot_cpu_data; +@@ -1078,6 +1092,8 @@ struct microcode_ops * __init init_intel + return NULL; + } + ++ llc_size_per_core = calc_llc_size_per_core(c); ++ + return µcode_intel_ops; + } +