]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.11-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 10 Jul 2017 14:44:57 +0000 (16:44 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 10 Jul 2017 14:44:57 +0000 (16:44 +0200)
added patches:
x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch

queue-4.11/series
queue-4.11/x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch [new file with mode: 0644]

index 01baa85132646c8cff36179ac38b049e21d67f8b..4aac36e6c08db22240bd5abe8d1d98206eab15e1 100644 (file)
@@ -30,3 +30,4 @@ gfs2-fix-glock-rhashtable-rcu-bug.patch
 add-shutdown-to-struct-class.patch
 tpm-issue-a-tpm2_shutdown-for-tpm2-devices.patch
 tpm-fix-a-kernel-memory-leak-in-tpm-sysfs.c.patch
+x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch
diff --git a/queue-4.11/x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch b/queue-4.11/x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch
new file mode 100644 (file)
index 0000000..d23c7d9
--- /dev/null
@@ -0,0 +1,83 @@
+From 236222d39347e0e486010f10c1493e83dbbdfba8 Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Thu, 29 Jun 2017 15:55:58 +0200
+Subject: x86/uaccess: Optimize copy_user_enhanced_fast_string() for short strings
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit 236222d39347e0e486010f10c1493e83dbbdfba8 upstream.
+
+According to the Intel datasheet, the REP MOVSB instruction
+exposes a pretty heavy setup cost (50 ticks), which hurts
+short string copy operations.
+
+This change tries to avoid this cost by calling the explicit
+loop available in the unrolled code for strings shorter
+than 64 bytes.
+
+The 64 bytes cutoff value is arbitrary from the code logic
+point of view - it has been selected based on measurements,
+as the largest value that still ensures a measurable gain.
+
+Micro benchmarks of the __copy_from_user() function with
+lengths in the [0-63] range show this performance gain
+(shorter the string, larger the gain):
+
+ - in the [55%-4%] range on Intel Xeon(R) CPU E5-2690 v4
+ - in the [72%-9%] range on Intel Core i7-4810MQ
+
+Other tested CPUs - namely Intel Atom S1260 and AMD Opteron
+8216 - show no difference, because they do not expose the
+ERMS feature bit.
+
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/4533a1d101fd460f80e21329a34928fad521c1d4.1498744345.git.pabeni@redhat.com
+[ Clarified the changelog. ]
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+
+diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
+index c5959576c315..020f75cc8cf6 100644
+--- a/arch/x86/lib/copy_user_64.S
++++ b/arch/x86/lib/copy_user_64.S
+@@ -37,7 +37,7 @@ ENTRY(copy_user_generic_unrolled)
+       movl %edx,%ecx
+       andl $63,%edx
+       shrl $6,%ecx
+-      jz 17f
++      jz .L_copy_short_string
+ 1:    movq (%rsi),%r8
+ 2:    movq 1*8(%rsi),%r9
+ 3:    movq 2*8(%rsi),%r10
+@@ -58,7 +58,8 @@ ENTRY(copy_user_generic_unrolled)
+       leaq 64(%rdi),%rdi
+       decl %ecx
+       jnz 1b
+-17:   movl %edx,%ecx
++.L_copy_short_string:
++      movl %edx,%ecx
+       andl $7,%edx
+       shrl $3,%ecx
+       jz 20f
+@@ -174,6 +175,8 @@ EXPORT_SYMBOL(copy_user_generic_string)
+  */
+ ENTRY(copy_user_enhanced_fast_string)
+       ASM_STAC
++      cmpl $64,%edx
++      jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */
+       movl %edx,%ecx
+ 1:    rep
+       movsb