From 01affd32f7517b47a1e0873bafa2ecf20e9721fc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 10 Jul 2017 16:45:31 +0200 Subject: [PATCH] 4.4-stable patches added patches: x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch --- queue-4.4/series | 1 + ...hanced_fast_string-for-short-strings.patch | 85 +++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 queue-4.4/x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch diff --git a/queue-4.4/series b/queue-4.4/series index 88331e6818a..ee46c16471f 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -23,3 +23,4 @@ usb-serial-option-add-two-longcheer-device-ids.patch usb-serial-qcserial-new-sierra-wireless-em7305-device-id.patch gfs2-fix-glock-rhashtable-rcu-bug.patch x86-tools-fix-gcc-7-warning-in-relocs.c.patch +x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch diff --git a/queue-4.4/x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch b/queue-4.4/x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch new file mode 100644 index 00000000000..39f80ac5494 --- /dev/null +++ b/queue-4.4/x86-uaccess-optimize-copy_user_enhanced_fast_string-for-short-strings.patch @@ -0,0 +1,85 @@ +From 236222d39347e0e486010f10c1493e83dbbdfba8 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Thu, 29 Jun 2017 15:55:58 +0200 +Subject: x86/uaccess: Optimize copy_user_enhanced_fast_string() for short strings + +From: Paolo Abeni + +commit 236222d39347e0e486010f10c1493e83dbbdfba8 upstream. + +According to the Intel datasheet, the REP MOVSB instruction +exposes a pretty heavy setup cost (50 ticks), which hurts +short string copy operations. + +This change tries to avoid this cost by calling the explicit +loop available in the unrolled code for strings shorter +than 64 bytes. + +The 64 bytes cutoff value is arbitrary from the code logic +point of view - it has been selected based on measurements, +as the largest value that still ensures a measurable gain. + +Micro benchmarks of the __copy_from_user() function with +lengths in the [0-63] range show this performance gain +(shorter the string, larger the gain): + + - in the [55%-4%] range on Intel Xeon(R) CPU E5-2690 v4 + - in the [72%-9%] range on Intel Core i7-4810MQ + +Other tested CPUs - namely Intel Atom S1260 and AMD Opteron +8216 - show no difference, because they do not expose the +ERMS feature bit. + +Signed-off-by: Paolo Abeni +Acked-by: Linus Torvalds +Cc: Alan Cox +Cc: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Hannes Frederic Sowa +Cc: Josh Poimboeuf +Cc: Kees Cook +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/4533a1d101fd460f80e21329a34928fad521c1d4.1498744345.git.pabeni@redhat.com +[ Clarified the changelog. ] +Signed-off-by: Ingo Molnar +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Mel Gorman + +--- + arch/x86/lib/copy_user_64.S | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/arch/x86/lib/copy_user_64.S ++++ b/arch/x86/lib/copy_user_64.S +@@ -80,7 +80,7 @@ ENTRY(copy_user_generic_unrolled) + movl %edx,%ecx + andl $63,%edx + shrl $6,%ecx +- jz 17f ++ jz .L_copy_short_string + 1: movq (%rsi),%r8 + 2: movq 1*8(%rsi),%r9 + 3: movq 2*8(%rsi),%r10 +@@ -101,7 +101,8 @@ ENTRY(copy_user_generic_unrolled) + leaq 64(%rdi),%rdi + decl %ecx + jnz 1b +-17: movl %edx,%ecx ++.L_copy_short_string: ++ movl %edx,%ecx + andl $7,%edx + shrl $3,%ecx + jz 20f +@@ -215,6 +216,8 @@ ENDPROC(copy_user_generic_string) + */ + ENTRY(copy_user_enhanced_fast_string) + ASM_STAC ++ cmpl $64,%edx ++ jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */ + movl %edx,%ecx + 1: rep + movsb -- 2.47.3