From: Greg Kroah-Hartman Date: Thu, 13 Aug 2015 21:27:07 +0000 (-0700) Subject: 3.10-stable patches X-Git-Tag: v3.10.87~33 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=af87cea63e4d672c6dc4d9eca15d4902bb5f1b5a;p=thirdparty%2Fkernel%2Fstable-queue.git 3.10-stable patches added patches: sparc64-fix-fpu-register-corruption-with-aes-crypto-offload.patch sparc64-fix-userspace-fpu-register-corruptions.patch --- diff --git a/queue-3.10/series b/queue-3.10/series index 1bd602ddfc0..828cbe74edf 100644 --- a/queue-3.10/series +++ b/queue-3.10/series @@ -12,3 +12,5 @@ md-use-kzalloc-when-bitmap-is-disabled.patch ipmi-fix-timeout-calculation-when-bmc-is-disconnected.patch mfd-sm501-dbg_regs-attribute-must-be-read-only.patch perf-x86-amd-rework-amd-pmu-init-code.patch +sparc64-fix-fpu-register-corruption-with-aes-crypto-offload.patch +sparc64-fix-userspace-fpu-register-corruptions.patch diff --git a/queue-3.10/sparc64-fix-fpu-register-corruption-with-aes-crypto-offload.patch b/queue-3.10/sparc64-fix-fpu-register-corruption-with-aes-crypto-offload.patch new file mode 100644 index 00000000000..19239dc84ad --- /dev/null +++ b/queue-3.10/sparc64-fix-fpu-register-corruption-with-aes-crypto-offload.patch @@ -0,0 +1,107 @@ +From foo@baz Thu Aug 13 14:26:49 PDT 2015 +From: "David S. Miller" +Date: Tue, 14 Oct 2014 19:37:58 -0700 +Subject: sparc64: Fix FPU register corruption with AES crypto offload. + +From: "David S. Miller" + +[ Upstream commit f4da3628dc7c32a59d1fb7116bb042e6f436d611 ] + +The AES loops in arch/sparc/crypto/aes_glue.c use a scheme where the +key material is preloaded into the FPU registers, and then we loop +over and over doing the crypt operation, reusing those pre-cooked key +registers. + +There are intervening blkcipher*() calls between the crypt operation +calls. And those might perform memcpy() and thus also try to use the +FPU. + +The sparc64 kernel FPU usage mechanism is designed to allow such +recursive uses, but with a catch. + +There has to be a trap between the two FPU using threads of control. + +The mechanism works by, when the FPU is already in use by the kernel, +allocating a slot for FPU saving at trap time. Then if, within the +trap handler, we try to use the FPU registers, the pre-trap FPU +register state is saved into the slot. Then at trap return time we +notice this and restore the pre-trap FPU state. + +Over the long term there are various more involved ways we can make +this work, but for a quick fix let's take advantage of the fact that +the situation where this happens is very limited. + +All sparc64 chips that support the crypto instructiosn also are using +the Niagara4 memcpy routine, and that routine only uses the FPU for +large copies where we can't get the source aligned properly to a +multiple of 8 bytes. + +We look to see if the FPU is already in use in this context, and if so +we use the non-large copy path which only uses integer registers. + +Furthermore, we also limit this special logic to when we are doing +kernel copy, rather than a user copy. + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/visasm.h | 8 ++++++++ + arch/sparc/lib/NG4memcpy.S | 14 +++++++++++++- + 2 files changed, 21 insertions(+), 1 deletion(-) + +--- a/arch/sparc/include/asm/visasm.h ++++ b/arch/sparc/include/asm/visasm.h +@@ -39,6 +39,14 @@ + 297: wr %o5, FPRS_FEF, %fprs; \ + 298: + ++#define VISEntryHalfFast(fail_label) \ ++ rd %fprs, %o5; \ ++ andcc %o5, FPRS_FEF, %g0; \ ++ be,pt %icc, 297f; \ ++ nop; \ ++ ba,a,pt %xcc, fail_label; \ ++297: wr %o5, FPRS_FEF, %fprs; ++ + #define VISExitHalf \ + wr %o5, 0, %fprs; + +--- a/arch/sparc/lib/NG4memcpy.S ++++ b/arch/sparc/lib/NG4memcpy.S +@@ -41,6 +41,10 @@ + #endif + #endif + ++#if !defined(EX_LD) && !defined(EX_ST) ++#define NON_USER_COPY ++#endif ++ + #ifndef EX_LD + #define EX_LD(x) x + #endif +@@ -197,9 +201,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + mov EX_RETVAL(%o3), %o0 + + .Llarge_src_unaligned: ++#ifdef NON_USER_COPY ++ VISEntryHalfFast(.Lmedium_vis_entry_fail) ++#else ++ VISEntryHalf ++#endif + andn %o2, 0x3f, %o4 + sub %o2, %o4, %o2 +- VISEntryHalf + alignaddr %o1, %g0, %g1 + add %o1, %o4, %o1 + EX_LD(LOAD(ldd, %g1 + 0x00, %f0)) +@@ -240,6 +248,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + nop + ba,a,pt %icc, .Lmedium_unaligned + ++#ifdef NON_USER_COPY ++.Lmedium_vis_entry_fail: ++ or %o0, %o1, %g2 ++#endif + .Lmedium: + LOAD(prefetch, %o1 + 0x40, #n_reads_strong) + andcc %g2, 0x7, %g0 diff --git a/queue-3.10/sparc64-fix-userspace-fpu-register-corruptions.patch b/queue-3.10/sparc64-fix-userspace-fpu-register-corruptions.patch new file mode 100644 index 00000000000..707a83f9594 --- /dev/null +++ b/queue-3.10/sparc64-fix-userspace-fpu-register-corruptions.patch @@ -0,0 +1,196 @@ +From foo@baz Thu Aug 13 14:26:49 PDT 2015 +From: "David S. Miller" +Date: Thu, 6 Aug 2015 19:13:25 -0700 +Subject: sparc64: Fix userspace FPU register corruptions. + +From: "David S. Miller" + +[ Upstream commit 44922150d87cef616fd183220d43d8fde4d41390 ] + +If we have a series of events from userpsace, with %fprs=FPRS_FEF, +like follows: + +ETRAP + ETRAP + VIS_ENTRY(fprs=0x4) + VIS_EXIT + RTRAP (kernel FPU restore with fpu_saved=0x4) + RTRAP + +We will not restore the user registers that were clobbered by the FPU +using kernel code in the inner-most trap. + +Traps allocate FPU save slots in the thread struct, and FPU using +sequences save the "dirty" FPU registers only. + +This works at the initial trap level because all of the registers +get recorded into the top-level FPU save area, and we'll return +to userspace with the FPU disabled so that any FPU use by the user +will take an FPU disabled trap wherein we'll load the registers +back up properly. + +But this is not how trap returns from kernel to kernel operate. + +The simplest fix for this bug is to always save all FPU register state +for anything other than the top-most FPU save area. + +Getting rid of the optimized inner-slot FPU saving code ends up +making VISEntryHalf degenerate into plain VISEntry. + +Longer term we need to do something smarter to reinstate the partial +save optimizations. Perhaps the fundament error is having trap entry +and exit allocate FPU save slots and restore register state. Instead, +the VISEntry et al. calls should be doing that work. + +This bug is about two decades old. + +Reported-by: James Y Knight +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + arch/sparc/include/asm/visasm.h | 16 ++------- + arch/sparc/lib/NG4memcpy.S | 5 ++ + arch/sparc/lib/VISsave.S | 67 +--------------------------------------- + arch/sparc/lib/ksyms.c | 4 -- + 4 files changed, 11 insertions(+), 81 deletions(-) + +--- a/arch/sparc/include/asm/visasm.h ++++ b/arch/sparc/include/asm/visasm.h +@@ -28,16 +28,10 @@ + * Must preserve %o5 between VISEntryHalf and VISExitHalf */ + + #define VISEntryHalf \ +- rd %fprs, %o5; \ +- andcc %o5, FPRS_FEF, %g0; \ +- be,pt %icc, 297f; \ +- sethi %hi(298f), %g7; \ +- sethi %hi(VISenterhalf), %g1; \ +- jmpl %g1 + %lo(VISenterhalf), %g0; \ +- or %g7, %lo(298f), %g7; \ +- clr %o5; \ +-297: wr %o5, FPRS_FEF, %fprs; \ +-298: ++ VISEntry ++ ++#define VISExitHalf \ ++ VISExit + + #define VISEntryHalfFast(fail_label) \ + rd %fprs, %o5; \ +@@ -47,7 +41,7 @@ + ba,a,pt %xcc, fail_label; \ + 297: wr %o5, FPRS_FEF, %fprs; + +-#define VISExitHalf \ ++#define VISExitHalfFast \ + wr %o5, 0, %fprs; + + #ifndef __ASSEMBLY__ +--- a/arch/sparc/lib/NG4memcpy.S ++++ b/arch/sparc/lib/NG4memcpy.S +@@ -240,8 +240,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len + add %o0, 0x40, %o0 + bne,pt %icc, 1b + LOAD(prefetch, %g1 + 0x200, #n_reads_strong) ++#ifdef NON_USER_COPY ++ VISExitHalfFast ++#else + VISExitHalf +- ++#endif + brz,pn %o2, .Lexit + cmp %o2, 19 + ble,pn %icc, .Lsmall_unaligned +--- a/arch/sparc/lib/VISsave.S ++++ b/arch/sparc/lib/VISsave.S +@@ -44,9 +44,8 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 + + stx %g3, [%g6 + TI_GSR] + 2: add %g6, %g1, %g3 +- cmp %o5, FPRS_DU +- be,pn %icc, 6f +- sll %g1, 3, %g1 ++ mov FPRS_DU | FPRS_DL | FPRS_FEF, %o5 ++ sll %g1, 3, %g1 + stb %o5, [%g3 + TI_FPSAVED] + rd %gsr, %g2 + add %g6, %g1, %g3 +@@ -80,65 +79,3 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 + .align 32 + 80: jmpl %g7 + %g0, %g0 + nop +- +-6: ldub [%g3 + TI_FPSAVED], %o5 +- or %o5, FPRS_DU, %o5 +- add %g6, TI_FPREGS+0x80, %g2 +- stb %o5, [%g3 + TI_FPSAVED] +- +- sll %g1, 5, %g1 +- add %g6, TI_FPREGS+0xc0, %g3 +- wr %g0, FPRS_FEF, %fprs +- membar #Sync +- stda %f32, [%g2 + %g1] ASI_BLK_P +- stda %f48, [%g3 + %g1] ASI_BLK_P +- membar #Sync +- ba,pt %xcc, 80f +- nop +- +- .align 32 +-80: jmpl %g7 + %g0, %g0 +- nop +- +- .align 32 +-VISenterhalf: +- ldub [%g6 + TI_FPDEPTH], %g1 +- brnz,a,pn %g1, 1f +- cmp %g1, 1 +- stb %g0, [%g6 + TI_FPSAVED] +- stx %fsr, [%g6 + TI_XFSR] +- clr %o5 +- jmpl %g7 + %g0, %g0 +- wr %g0, FPRS_FEF, %fprs +- +-1: bne,pn %icc, 2f +- srl %g1, 1, %g1 +- ba,pt %xcc, vis1 +- sub %g7, 8, %g7 +-2: addcc %g6, %g1, %g3 +- sll %g1, 3, %g1 +- andn %o5, FPRS_DU, %g2 +- stb %g2, [%g3 + TI_FPSAVED] +- +- rd %gsr, %g2 +- add %g6, %g1, %g3 +- stx %g2, [%g3 + TI_GSR] +- add %g6, %g1, %g2 +- stx %fsr, [%g2 + TI_XFSR] +- sll %g1, 5, %g1 +-3: andcc %o5, FPRS_DL, %g0 +- be,pn %icc, 4f +- add %g6, TI_FPREGS, %g2 +- +- add %g6, TI_FPREGS+0x40, %g3 +- membar #Sync +- stda %f0, [%g2 + %g1] ASI_BLK_P +- stda %f16, [%g3 + %g1] ASI_BLK_P +- membar #Sync +- ba,pt %xcc, 4f +- nop +- +- .align 32 +-4: and %o5, FPRS_DU, %o5 +- jmpl %g7 + %g0, %g0 +- wr %o5, FPRS_FEF, %fprs +--- a/arch/sparc/lib/ksyms.c ++++ b/arch/sparc/lib/ksyms.c +@@ -126,10 +126,6 @@ EXPORT_SYMBOL(copy_user_page); + void VISenter(void); + EXPORT_SYMBOL(VISenter); + +-/* CRYPTO code needs this */ +-void VISenterhalf(void); +-EXPORT_SYMBOL(VISenterhalf); +- + extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); + extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *, + unsigned long *);