From: Greg Kroah-Hartman Date: Mon, 10 Jul 2017 14:45:48 +0000 (+0200) Subject: 4.9-stable patches X-Git-Tag: v4.9.37~10 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7874027efe159fdde03bf9e8e1d9f86f6ff84057;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch --- diff --git a/queue-4.9/series b/queue-4.9/series index d9546c36d2a..f77de98ffe6 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -33,3 +33,4 @@ xhci-limit-usb2-port-wake-support-for-amd-promontory-hosts.patch gfs2-fix-glock-rhashtable-rcu-bug.patch tpm-fix-a-kernel-memory-leak-in-tpm-sysfs.c.patch x86-tools-fix-gcc-7-warning-in-relocs.c.patch +x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch diff --git a/queue-4.9/x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch b/queue-4.9/x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch new file mode 100644 index 00000000000..b6386f01374 --- /dev/null +++ b/queue-4.9/x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch @@ -0,0 +1,85 @@ +From 236222d39347e0e486010f10c1493e83dbbdfba8 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Thu, 29 Jun 2017 15:55:58 +0200 +Subject: x86/uaccess: Optimize copy_user_enhanced_fast_string() for short strings + +From: Paolo Abeni + +commit 236222d39347e0e486010f10c1493e83dbbdfba8 upstream. + +According to the Intel datasheet, the REP MOVSB instruction +exposes a pretty heavy setup cost (50 ticks), which hurts +short string copy operations. + +This change tries to avoid this cost by calling the explicit +loop available in the unrolled code for strings shorter +than 64 bytes. + +The 64 bytes cutoff value is arbitrary from the code logic +point of view - it has been selected based on measurements, +as the largest value that still ensures a measurable gain. + +Micro benchmarks of the __copy_from_user() function with +lengths in the [0-63] range show this performance gain +(shorter the string, larger the gain): + + - in the [55%-4%] range on Intel Xeon(R) CPU E5-2690 v4 + - in the [72%-9%] range on Intel Core i7-4810MQ + +Other tested CPUs - namely Intel Atom S1260 and AMD Opteron +8216 - show no difference, because they do not expose the +ERMS feature bit. + +Signed-off-by: Paolo Abeni +Acked-by: Linus Torvalds +Cc: Alan Cox +Cc: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Hannes Frederic Sowa +Cc: Josh Poimboeuf +Cc: Kees Cook +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/4533a1d101fd460f80e21329a34928fad521c1d4.1498744345.git.pabeni@redhat.com +[ Clarified the changelog. ] +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Mel Gorman + +--- + arch/x86/lib/copy_user_64.S | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/arch/x86/lib/copy_user_64.S ++++ b/arch/x86/lib/copy_user_64.S +@@ -84,7 +84,7 @@ ENTRY(copy_user_generic_unrolled) + movl %edx,%ecx + andl $63,%edx + shrl $6,%ecx +- jz 17f ++ jz .L_copy_short_string + 1: movq (%rsi),%r8 + 2: movq 1*8(%rsi),%r9 + 3: movq 2*8(%rsi),%r10 +@@ -105,7 +105,8 @@ ENTRY(copy_user_generic_unrolled) + leaq 64(%rdi),%rdi + decl %ecx + jnz 1b +-17: movl %edx,%ecx ++.L_copy_short_string: ++ movl %edx,%ecx + andl $7,%edx + shrl $3,%ecx + jz 20f +@@ -221,6 +222,8 @@ EXPORT_SYMBOL(copy_user_generic_string) + */ + ENTRY(copy_user_enhanced_fast_string) + ASM_STAC ++ cmpl $64,%edx ++ jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */ + movl %edx,%ecx + 1: rep + movsb