From: Greg Kroah-Hartman Date: Mon, 10 Jul 2017 14:44:57 +0000 (+0200) Subject: 4.11-stable patches X-Git-Tag: v4.9.37~13 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=17ac51a672047e4ea10d1ef67c3bd154a6bc4df9;p=thirdparty%2Fkernel%2Fstable-queue.git 4.11-stable patches added patches: x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch --- diff --git a/queue-4.11/series b/queue-4.11/series index 01baa851326..4aac36e6c08 100644 --- a/queue-4.11/series +++ b/queue-4.11/series @@ -30,3 +30,4 @@ gfs2-fix-glock-rhashtable-rcu-bug.patch add-shutdown-to-struct-class.patch tpm-issue-a-tpm2_shutdown-for-tpm2-devices.patch tpm-fix-a-kernel-memory-leak-in-tpm-sysfs.c.patch +x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch diff --git a/queue-4.11/x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch b/queue-4.11/x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch new file mode 100644 index 00000000000..d23c7d9edb5 --- /dev/null +++ b/queue-4.11/x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch @@ -0,0 +1,83 @@ +From 236222d39347e0e486010f10c1493e83dbbdfba8 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Thu, 29 Jun 2017 15:55:58 +0200 +Subject: x86/uaccess: Optimize copy_user_enhanced_fast_string() for short strings + +From: Paolo Abeni + +commit 236222d39347e0e486010f10c1493e83dbbdfba8 upstream. + +According to the Intel datasheet, the REP MOVSB instruction +exposes a pretty heavy setup cost (50 ticks), which hurts +short string copy operations. + +This change tries to avoid this cost by calling the explicit +loop available in the unrolled code for strings shorter +than 64 bytes. + +The 64 bytes cutoff value is arbitrary from the code logic +point of view - it has been selected based on measurements, +as the largest value that still ensures a measurable gain. + +Micro benchmarks of the __copy_from_user() function with +lengths in the [0-63] range show this performance gain +(shorter the string, larger the gain): + + - in the [55%-4%] range on Intel Xeon(R) CPU E5-2690 v4 + - in the [72%-9%] range on Intel Core i7-4810MQ + +Other tested CPUs - namely Intel Atom S1260 and AMD Opteron +8216 - show no difference, because they do not expose the +ERMS feature bit. + +Signed-off-by: Paolo Abeni +Acked-by: Linus Torvalds +Cc: Alan Cox +Cc: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Hannes Frederic Sowa +Cc: Josh Poimboeuf +Cc: Kees Cook +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/4533a1d101fd460f80e21329a34928fad521c1d4.1498744345.git.pabeni@redhat.com +[ Clarified the changelog. ] +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Mel Gorman + +diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S +index c5959576c315..020f75cc8cf6 100644 +--- a/arch/x86/lib/copy_user_64.S ++++ b/arch/x86/lib/copy_user_64.S +@@ -37,7 +37,7 @@ ENTRY(copy_user_generic_unrolled) + movl %edx,%ecx + andl $63,%edx + shrl $6,%ecx +- jz 17f ++ jz .L_copy_short_string + 1: movq (%rsi),%r8 + 2: movq 1*8(%rsi),%r9 + 3: movq 2*8(%rsi),%r10 +@@ -58,7 +58,8 @@ ENTRY(copy_user_generic_unrolled) + leaq 64(%rdi),%rdi + decl %ecx + jnz 1b +-17: movl %edx,%ecx ++.L_copy_short_string: ++ movl %edx,%ecx + andl $7,%edx + shrl $3,%ecx + jz 20f +@@ -174,6 +175,8 @@ EXPORT_SYMBOL(copy_user_generic_string) + */ + ENTRY(copy_user_enhanced_fast_string) + ASM_STAC ++ cmpl $64,%edx ++ jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */ + movl %edx,%ecx + 1: rep + movsb