From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 10 Jul 2017 14:44:57 +0000 (+0200)
Subject: 4.11-stable patches
X-Git-Tag: v4.9.37~13
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=17ac51a672047e4ea10d1ef67c3bd154a6bc4df9;p=thirdparty%2Fkernel%2Fstable-queue.git

4.11-stable patches

added patches:
	x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch
---

diff --git a/queue-4.11/series b/queue-4.11/series
index 01baa851326..4aac36e6c08 100644
--- a/queue-4.11/series
+++ b/queue-4.11/series
@@ -30,3 +30,4 @@ gfs2-fix-glock-rhashtable-rcu-bug.patch
 add-shutdown-to-struct-class.patch
 tpm-issue-a-tpm2_shutdown-for-tpm2-devices.patch
 tpm-fix-a-kernel-memory-leak-in-tpm-sysfs.c.patch
+x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch
diff --git a/queue-4.11/x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch b/queue-4.11/x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch
new file mode 100644
index 00000000000..d23c7d9edb5
--- /dev/null
+++ b/queue-4.11/x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch
@@ -0,0 +1,83 @@
+From 236222d39347e0e486010f10c1493e83dbbdfba8 Mon Sep 17 00:00:00 2001
+From: Paolo Abeni <pabeni@redhat.com>
+Date: Thu, 29 Jun 2017 15:55:58 +0200
+Subject: x86/uaccess: Optimize copy_user_enhanced_fast_string() for short strings
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+commit 236222d39347e0e486010f10c1493e83dbbdfba8 upstream.
+
+According to the Intel datasheet, the REP MOVSB instruction
+exposes a pretty heavy setup cost (50 ticks), which hurts
+short string copy operations.
+
+This change tries to avoid this cost by calling the explicit
+loop available in the unrolled code for strings shorter
+than 64 bytes.
+
+The 64 bytes cutoff value is arbitrary from the code logic
+point of view - it has been selected based on measurements,
+as the largest value that still ensures a measurable gain.
+
+Micro benchmarks of the __copy_from_user() function with
+lengths in the [0-63] range show this performance gain
+(shorter the string, larger the gain):
+
+ - in the [55%-4%] range on Intel Xeon(R) CPU E5-2690 v4
+ - in the [72%-9%] range on Intel Core i7-4810MQ
+
+Other tested CPUs - namely Intel Atom S1260 and AMD Opteron
+8216 - show no difference, because they do not expose the
+ERMS feature bit.
+
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Brian Gerst <brgerst@gmail.com>
+Cc: Denys Vlasenko <dvlasenk@redhat.com>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
+Cc: Josh Poimboeuf <jpoimboe@redhat.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Link: http://lkml.kernel.org/r/4533a1d101fd460f80e21329a34928fad521c1d4.1498744345.git.pabeni@redhat.com
+[ Clarified the changelog. ]
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
+
+diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
+index c5959576c315..020f75cc8cf6 100644
+--- a/arch/x86/lib/copy_user_64.S
++++ b/arch/x86/lib/copy_user_64.S
+@@ -37,7 +37,7 @@ ENTRY(copy_user_generic_unrolled)
+ 	movl %edx,%ecx
+ 	andl $63,%edx
+ 	shrl $6,%ecx
+-	jz 17f
++	jz .L_copy_short_string
+ 1:	movq (%rsi),%r8
+ 2:	movq 1*8(%rsi),%r9
+ 3:	movq 2*8(%rsi),%r10
+@@ -58,7 +58,8 @@ ENTRY(copy_user_generic_unrolled)
+ 	leaq 64(%rdi),%rdi
+ 	decl %ecx
+ 	jnz 1b
+-17:	movl %edx,%ecx
++.L_copy_short_string:
++	movl %edx,%ecx
+ 	andl $7,%edx
+ 	shrl $3,%ecx
+ 	jz 20f
+@@ -174,6 +175,8 @@ EXPORT_SYMBOL(copy_user_generic_string)
+  */
+ ENTRY(copy_user_enhanced_fast_string)
+ 	ASM_STAC
++	cmpl $64,%edx
++	jb .L_copy_short_string	/* less then 64 bytes, avoid the costly 'rep' */
+ 	movl %edx,%ecx
+ 1:	rep
+ 	movsb