From: Greg Kroah-Hartman Date: Mon, 1 Apr 2019 16:59:51 +0000 (+0200) Subject: drop x86-asm-rewrite-sync_core-to-use-iret-to-self.patch from 4.4 and 4.9.y X-Git-Tag: v3.18.138~1 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=42ea5e2a9a9f9f65e236153887e0bedaf20e53b4;p=thirdparty%2Fkernel%2Fstable-queue.git drop x86-asm-rewrite-sync_core-to-use-iret-to-self.patch from 4.4 and 4.9.y --- diff --git a/queue-4.4/series b/queue-4.4/series index 10d88dfe0c5..8d55cb065fb 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -129,4 +129,3 @@ revert-usb-core-only-clean-up-what-we-allocated.patch arm64-support-keyctl-system-call-in-32-bit-mode.patch coresight-removing-bind-unbind-options-from-sysfs.patch stm-class-hide-stm-specific-options-if-stm-is-disabl.patch -x86-asm-rewrite-sync_core-to-use-iret-to-self.patch diff --git a/queue-4.4/x86-asm-rewrite-sync_core-to-use-iret-to-self.patch b/queue-4.4/x86-asm-rewrite-sync_core-to-use-iret-to-self.patch deleted file mode 100644 index 0d221c7b4d8..00000000000 --- a/queue-4.4/x86-asm-rewrite-sync_core-to-use-iret-to-self.patch +++ /dev/null @@ -1,134 +0,0 @@ -From c198b121b1a1d7a7171770c634cd49191bac4477 Mon Sep 17 00:00:00 2001 -From: Andy Lutomirski -Date: Fri, 9 Dec 2016 10:24:08 -0800 -Subject: x86/asm: Rewrite sync_core() to use IRET-to-self - -From: Andy Lutomirski - -commit c198b121b1a1d7a7171770c634cd49191bac4477 upstream. - -Aside from being excessively slow, CPUID is problematic: Linux runs -on a handful of CPUs that don't have CPUID. Use IRET-to-self -instead. IRET-to-self works everywhere, so it makes testing easy. - -For reference, On my laptop, IRET-to-self is ~110ns, -CPUID(eax=1, ecx=0) is ~83ns on native and very very slow under KVM, -and MOV-to-CR2 is ~42ns. - -While we're at it: sync_core() serves a very specific purpose. -Document it. - -Signed-off-by: Andy Lutomirski -Cc: Juergen Gross -Cc: One Thousand Gnomes -Cc: Peter Zijlstra -Cc: Brian Gerst -Cc: Matthew Whitehead -Cc: Borislav Petkov -Cc: Henrique de Moraes Holschuh -Cc: Andrew Cooper -Cc: Boris Ostrovsky -Cc: xen-devel -Link: http://lkml.kernel.org/r/5c79f0225f68bc8c40335612bf624511abb78941.1481307769.git.luto@kernel.org -Signed-off-by: Thomas Gleixner -Cc: Evalds Iodzevics -Signed-off-by: Greg Kroah-Hartman - ---- - arch/x86/include/asm/processor.h | 80 ++++++++++++++++++++++++++++----------- - 1 file changed, 58 insertions(+), 22 deletions(-) - ---- a/arch/x86/include/asm/processor.h -+++ b/arch/x86/include/asm/processor.h -@@ -575,33 +575,69 @@ static __always_inline void cpu_relax(vo - - #define cpu_relax_lowlatency() cpu_relax() - --/* Stop speculative execution and prefetching of modified code. */ -+/* -+ * This function forces the icache and prefetched instruction stream to -+ * catch up with reality in two very specific cases: -+ * -+ * a) Text was modified using one virtual address and is about to be executed -+ * from the same physical page at a different virtual address. -+ * -+ * b) Text was modified on a different CPU, may subsequently be -+ * executed on this CPU, and you want to make sure the new version -+ * gets executed. This generally means you're calling this in a IPI. -+ * -+ * If you're calling this for a different reason, you're probably doing -+ * it wrong. -+ */ - static inline void sync_core(void) - { -- int tmp; -- --#ifdef CONFIG_X86_32 - /* -- * Do a CPUID if available, otherwise do a jump. The jump -- * can conveniently enough be the jump around CPUID. -+ * There are quite a few ways to do this. IRET-to-self is nice -+ * because it works on every CPU, at any CPL (so it's compatible -+ * with paravirtualization), and it never exits to a hypervisor. -+ * The only down sides are that it's a bit slow (it seems to be -+ * a bit more than 2x slower than the fastest options) and that -+ * it unmasks NMIs. The "push %cs" is needed because, in -+ * paravirtual environments, __KERNEL_CS may not be a valid CS -+ * value when we do IRET directly. -+ * -+ * In case NMI unmasking or performance ever becomes a problem, -+ * the next best option appears to be MOV-to-CR2 and an -+ * unconditional jump. That sequence also works on all CPUs, -+ * but it will fault at CPL3 (i.e. Xen PV and lguest). -+ * -+ * CPUID is the conventional way, but it's nasty: it doesn't -+ * exist on some 486-like CPUs, and it usually exits to a -+ * hypervisor. -+ * -+ * Like all of Linux's memory ordering operations, this is a -+ * compiler barrier as well. - */ -- asm volatile("cmpl %2,%1\n\t" -- "jl 1f\n\t" -- "cpuid\n" -- "1:" -- : "=a" (tmp) -- : "rm" (boot_cpu_data.cpuid_level), "ri" (0), "0" (1) -- : "ebx", "ecx", "edx", "memory"); -+ register void *__sp asm(_ASM_SP); -+ -+#ifdef CONFIG_X86_32 -+ asm volatile ( -+ "pushfl\n\t" -+ "pushl %%cs\n\t" -+ "pushl $1f\n\t" -+ "iret\n\t" -+ "1:" -+ : "+r" (__sp) : : "memory"); - #else -- /* -- * CPUID is a barrier to speculative execution. -- * Prefetched instructions are automatically -- * invalidated when modified. -- */ -- asm volatile("cpuid" -- : "=a" (tmp) -- : "0" (1) -- : "ebx", "ecx", "edx", "memory"); -+ unsigned int tmp; -+ -+ asm volatile ( -+ "mov %%ss, %0\n\t" -+ "pushq %q0\n\t" -+ "pushq %%rsp\n\t" -+ "addq $8, (%%rsp)\n\t" -+ "pushfq\n\t" -+ "mov %%cs, %0\n\t" -+ "pushq %q0\n\t" -+ "pushq $1f\n\t" -+ "iretq\n\t" -+ "1:" -+ : "=&r" (tmp), "+r" (__sp) : : "cc", "memory"); - #endif - } - diff --git a/queue-4.9/series b/queue-4.9/series index 8e6bffcabe6..9a6d5d8626c 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -54,4 +54,3 @@ usb-gadget-f_hid-fix-deadlock-in-f_hidg_write.patch xhci-fix-port-resume-done-detection-for-ss-ports-with-lpm-enabled.patch revert-usb-core-only-clean-up-what-we-allocated.patch arm64-support-keyctl-system-call-in-32-bit-mode.patch -x86-asm-rewrite-sync_core-to-use-iret-to-self.patch diff --git a/queue-4.9/x86-asm-rewrite-sync_core-to-use-iret-to-self.patch b/queue-4.9/x86-asm-rewrite-sync_core-to-use-iret-to-self.patch deleted file mode 100644 index 3127d4bc679..00000000000 --- a/queue-4.9/x86-asm-rewrite-sync_core-to-use-iret-to-self.patch +++ /dev/null @@ -1,134 +0,0 @@ -From c198b121b1a1d7a7171770c634cd49191bac4477 Mon Sep 17 00:00:00 2001 -From: Andy Lutomirski -Date: Fri, 9 Dec 2016 10:24:08 -0800 -Subject: x86/asm: Rewrite sync_core() to use IRET-to-self - -From: Andy Lutomirski - -commit c198b121b1a1d7a7171770c634cd49191bac4477 upstream. - -Aside from being excessively slow, CPUID is problematic: Linux runs -on a handful of CPUs that don't have CPUID. Use IRET-to-self -instead. IRET-to-self works everywhere, so it makes testing easy. - -For reference, On my laptop, IRET-to-self is ~110ns, -CPUID(eax=1, ecx=0) is ~83ns on native and very very slow under KVM, -and MOV-to-CR2 is ~42ns. - -While we're at it: sync_core() serves a very specific purpose. -Document it. - -Signed-off-by: Andy Lutomirski -Cc: Juergen Gross -Cc: One Thousand Gnomes -Cc: Peter Zijlstra -Cc: Brian Gerst -Cc: Matthew Whitehead -Cc: Borislav Petkov -Cc: Henrique de Moraes Holschuh -Cc: Andrew Cooper -Cc: Boris Ostrovsky -Cc: xen-devel -Link: http://lkml.kernel.org/r/5c79f0225f68bc8c40335612bf624511abb78941.1481307769.git.luto@kernel.org -Signed-off-by: Thomas Gleixner -Cc: Evalds Iodzevics -Signed-off-by: Greg Kroah-Hartman - ---- - arch/x86/include/asm/processor.h | 80 ++++++++++++++++++++++++++++----------- - 1 file changed, 58 insertions(+), 22 deletions(-) - ---- a/arch/x86/include/asm/processor.h -+++ b/arch/x86/include/asm/processor.h -@@ -596,33 +596,69 @@ static __always_inline void cpu_relax(vo - - #define cpu_relax_lowlatency() cpu_relax() - --/* Stop speculative execution and prefetching of modified code. */ -+/* -+ * This function forces the icache and prefetched instruction stream to -+ * catch up with reality in two very specific cases: -+ * -+ * a) Text was modified using one virtual address and is about to be executed -+ * from the same physical page at a different virtual address. -+ * -+ * b) Text was modified on a different CPU, may subsequently be -+ * executed on this CPU, and you want to make sure the new version -+ * gets executed. This generally means you're calling this in a IPI. -+ * -+ * If you're calling this for a different reason, you're probably doing -+ * it wrong. -+ */ - static inline void sync_core(void) - { -- int tmp; -- --#ifdef CONFIG_X86_32 - /* -- * Do a CPUID if available, otherwise do a jump. The jump -- * can conveniently enough be the jump around CPUID. -+ * There are quite a few ways to do this. IRET-to-self is nice -+ * because it works on every CPU, at any CPL (so it's compatible -+ * with paravirtualization), and it never exits to a hypervisor. -+ * The only down sides are that it's a bit slow (it seems to be -+ * a bit more than 2x slower than the fastest options) and that -+ * it unmasks NMIs. The "push %cs" is needed because, in -+ * paravirtual environments, __KERNEL_CS may not be a valid CS -+ * value when we do IRET directly. -+ * -+ * In case NMI unmasking or performance ever becomes a problem, -+ * the next best option appears to be MOV-to-CR2 and an -+ * unconditional jump. That sequence also works on all CPUs, -+ * but it will fault at CPL3 (i.e. Xen PV and lguest). -+ * -+ * CPUID is the conventional way, but it's nasty: it doesn't -+ * exist on some 486-like CPUs, and it usually exits to a -+ * hypervisor. -+ * -+ * Like all of Linux's memory ordering operations, this is a -+ * compiler barrier as well. - */ -- asm volatile("cmpl %2,%1\n\t" -- "jl 1f\n\t" -- "cpuid\n" -- "1:" -- : "=a" (tmp) -- : "rm" (boot_cpu_data.cpuid_level), "ri" (0), "0" (1) -- : "ebx", "ecx", "edx", "memory"); -+ register void *__sp asm(_ASM_SP); -+ -+#ifdef CONFIG_X86_32 -+ asm volatile ( -+ "pushfl\n\t" -+ "pushl %%cs\n\t" -+ "pushl $1f\n\t" -+ "iret\n\t" -+ "1:" -+ : "+r" (__sp) : : "memory"); - #else -- /* -- * CPUID is a barrier to speculative execution. -- * Prefetched instructions are automatically -- * invalidated when modified. -- */ -- asm volatile("cpuid" -- : "=a" (tmp) -- : "0" (1) -- : "ebx", "ecx", "edx", "memory"); -+ unsigned int tmp; -+ -+ asm volatile ( -+ "mov %%ss, %0\n\t" -+ "pushq %q0\n\t" -+ "pushq %%rsp\n\t" -+ "addq $8, (%%rsp)\n\t" -+ "pushfq\n\t" -+ "mov %%cs, %0\n\t" -+ "pushq %q0\n\t" -+ "pushq $1f\n\t" -+ "iretq\n\t" -+ "1:" -+ : "=&r" (tmp), "+r" (__sp) : : "cc", "memory"); - #endif - } -