From: Greg Kroah-Hartman Date: Mon, 1 Apr 2019 16:07:05 +0000 (+0200) Subject: 4.9-stable patches X-Git-Tag: v3.18.138~2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=018f3075ccd6c13c405ca76916f2d9b80086373f;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: x86-asm-rewrite-sync_core-to-use-iret-to-self.patch --- diff --git a/queue-4.9/series b/queue-4.9/series index 9a6d5d8626c..8e6bffcabe6 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -54,3 +54,4 @@ usb-gadget-f_hid-fix-deadlock-in-f_hidg_write.patch xhci-fix-port-resume-done-detection-for-ss-ports-with-lpm-enabled.patch revert-usb-core-only-clean-up-what-we-allocated.patch arm64-support-keyctl-system-call-in-32-bit-mode.patch +x86-asm-rewrite-sync_core-to-use-iret-to-self.patch diff --git a/queue-4.9/x86-asm-rewrite-sync_core-to-use-iret-to-self.patch b/queue-4.9/x86-asm-rewrite-sync_core-to-use-iret-to-self.patch new file mode 100644 index 00000000000..3127d4bc679 --- /dev/null +++ b/queue-4.9/x86-asm-rewrite-sync_core-to-use-iret-to-self.patch @@ -0,0 +1,134 @@ +From c198b121b1a1d7a7171770c634cd49191bac4477 Mon Sep 17 00:00:00 2001 +From: Andy Lutomirski +Date: Fri, 9 Dec 2016 10:24:08 -0800 +Subject: x86/asm: Rewrite sync_core() to use IRET-to-self + +From: Andy Lutomirski + +commit c198b121b1a1d7a7171770c634cd49191bac4477 upstream. + +Aside from being excessively slow, CPUID is problematic: Linux runs +on a handful of CPUs that don't have CPUID. Use IRET-to-self +instead. IRET-to-self works everywhere, so it makes testing easy. + +For reference, On my laptop, IRET-to-self is ~110ns, +CPUID(eax=1, ecx=0) is ~83ns on native and very very slow under KVM, +and MOV-to-CR2 is ~42ns. + +While we're at it: sync_core() serves a very specific purpose. +Document it. + +Signed-off-by: Andy Lutomirski +Cc: Juergen Gross +Cc: One Thousand Gnomes +Cc: Peter Zijlstra +Cc: Brian Gerst +Cc: Matthew Whitehead +Cc: Borislav Petkov +Cc: Henrique de Moraes Holschuh +Cc: Andrew Cooper +Cc: Boris Ostrovsky +Cc: xen-devel +Link: http://lkml.kernel.org/r/5c79f0225f68bc8c40335612bf624511abb78941.1481307769.git.luto@kernel.org +Signed-off-by: Thomas Gleixner +Cc: Evalds Iodzevics +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/processor.h | 80 ++++++++++++++++++++++++++++----------- + 1 file changed, 58 insertions(+), 22 deletions(-) + +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -596,33 +596,69 @@ static __always_inline void cpu_relax(vo + + #define cpu_relax_lowlatency() cpu_relax() + +-/* Stop speculative execution and prefetching of modified code. */ ++/* ++ * This function forces the icache and prefetched instruction stream to ++ * catch up with reality in two very specific cases: ++ * ++ * a) Text was modified using one virtual address and is about to be executed ++ * from the same physical page at a different virtual address. ++ * ++ * b) Text was modified on a different CPU, may subsequently be ++ * executed on this CPU, and you want to make sure the new version ++ * gets executed. This generally means you're calling this in a IPI. ++ * ++ * If you're calling this for a different reason, you're probably doing ++ * it wrong. ++ */ + static inline void sync_core(void) + { +- int tmp; +- +-#ifdef CONFIG_X86_32 + /* +- * Do a CPUID if available, otherwise do a jump. The jump +- * can conveniently enough be the jump around CPUID. ++ * There are quite a few ways to do this. IRET-to-self is nice ++ * because it works on every CPU, at any CPL (so it's compatible ++ * with paravirtualization), and it never exits to a hypervisor. ++ * The only down sides are that it's a bit slow (it seems to be ++ * a bit more than 2x slower than the fastest options) and that ++ * it unmasks NMIs. The "push %cs" is needed because, in ++ * paravirtual environments, __KERNEL_CS may not be a valid CS ++ * value when we do IRET directly. ++ * ++ * In case NMI unmasking or performance ever becomes a problem, ++ * the next best option appears to be MOV-to-CR2 and an ++ * unconditional jump. That sequence also works on all CPUs, ++ * but it will fault at CPL3 (i.e. Xen PV and lguest). ++ * ++ * CPUID is the conventional way, but it's nasty: it doesn't ++ * exist on some 486-like CPUs, and it usually exits to a ++ * hypervisor. ++ * ++ * Like all of Linux's memory ordering operations, this is a ++ * compiler barrier as well. + */ +- asm volatile("cmpl %2,%1\n\t" +- "jl 1f\n\t" +- "cpuid\n" +- "1:" +- : "=a" (tmp) +- : "rm" (boot_cpu_data.cpuid_level), "ri" (0), "0" (1) +- : "ebx", "ecx", "edx", "memory"); ++ register void *__sp asm(_ASM_SP); ++ ++#ifdef CONFIG_X86_32 ++ asm volatile ( ++ "pushfl\n\t" ++ "pushl %%cs\n\t" ++ "pushl $1f\n\t" ++ "iret\n\t" ++ "1:" ++ : "+r" (__sp) : : "memory"); + #else +- /* +- * CPUID is a barrier to speculative execution. +- * Prefetched instructions are automatically +- * invalidated when modified. +- */ +- asm volatile("cpuid" +- : "=a" (tmp) +- : "0" (1) +- : "ebx", "ecx", "edx", "memory"); ++ unsigned int tmp; ++ ++ asm volatile ( ++ "mov %%ss, %0\n\t" ++ "pushq %q0\n\t" ++ "pushq %%rsp\n\t" ++ "addq $8, (%%rsp)\n\t" ++ "pushfq\n\t" ++ "mov %%cs, %0\n\t" ++ "pushq %q0\n\t" ++ "pushq $1f\n\t" ++ "iretq\n\t" ++ "1:" ++ : "=&r" (tmp), "+r" (__sp) : : "cc", "memory"); + #endif + } +