]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
3.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 13 Aug 2015 21:27:07 +0000 (14:27 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 13 Aug 2015 21:27:07 +0000 (14:27 -0700)
added patches:
sparc64-fix-fpu-register-corruption-with-aes-crypto-offload.patch
sparc64-fix-userspace-fpu-register-corruptions.patch

queue-3.10/series
queue-3.10/sparc64-fix-fpu-register-corruption-with-aes-crypto-offload.patch [new file with mode: 0644]
queue-3.10/sparc64-fix-userspace-fpu-register-corruptions.patch [new file with mode: 0644]

index 1bd602ddfc0762627cdc8de9a0eb864e47fc56d9..828cbe74edfa50cc7ecd2903bf8279b14a9f9132 100644 (file)
@@ -12,3 +12,5 @@ md-use-kzalloc-when-bitmap-is-disabled.patch
 ipmi-fix-timeout-calculation-when-bmc-is-disconnected.patch
 mfd-sm501-dbg_regs-attribute-must-be-read-only.patch
 perf-x86-amd-rework-amd-pmu-init-code.patch
+sparc64-fix-fpu-register-corruption-with-aes-crypto-offload.patch
+sparc64-fix-userspace-fpu-register-corruptions.patch
diff --git a/queue-3.10/sparc64-fix-fpu-register-corruption-with-aes-crypto-offload.patch b/queue-3.10/sparc64-fix-fpu-register-corruption-with-aes-crypto-offload.patch
new file mode 100644 (file)
index 0000000..19239dc
--- /dev/null
@@ -0,0 +1,107 @@
+From foo@baz Thu Aug 13 14:26:49 PDT 2015
+From: "David S. Miller" <davem@davemloft.net>
+Date: Tue, 14 Oct 2014 19:37:58 -0700
+Subject: sparc64: Fix FPU register corruption with AES crypto offload.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit f4da3628dc7c32a59d1fb7116bb042e6f436d611 ]
+
+The AES loops in arch/sparc/crypto/aes_glue.c use a scheme where the
+key material is preloaded into the FPU registers, and then we loop
+over and over doing the crypt operation, reusing those pre-cooked key
+registers.
+
+There are intervening blkcipher*() calls between the crypt operation
+calls.  And those might perform memcpy() and thus also try to use the
+FPU.
+
+The sparc64 kernel FPU usage mechanism is designed to allow such
+recursive uses, but with a catch.
+
+There has to be a trap between the two FPU using threads of control.
+
+The mechanism works by, when the FPU is already in use by the kernel,
+allocating a slot for FPU saving at trap time.  Then if, within the
+trap handler, we try to use the FPU registers, the pre-trap FPU
+register state is saved into the slot.  Then at trap return time we
+notice this and restore the pre-trap FPU state.
+
+Over the long term there are various more involved ways we can make
+this work, but for a quick fix let's take advantage of the fact that
+the situation where this happens is very limited.
+
+All sparc64 chips that support the crypto instructiosn also are using
+the Niagara4 memcpy routine, and that routine only uses the FPU for
+large copies where we can't get the source aligned properly to a
+multiple of 8 bytes.
+
+We look to see if the FPU is already in use in this context, and if so
+we use the non-large copy path which only uses integer registers.
+
+Furthermore, we also limit this special logic to when we are doing
+kernel copy, rather than a user copy.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/visasm.h |    8 ++++++++
+ arch/sparc/lib/NG4memcpy.S      |   14 +++++++++++++-
+ 2 files changed, 21 insertions(+), 1 deletion(-)
+
+--- a/arch/sparc/include/asm/visasm.h
++++ b/arch/sparc/include/asm/visasm.h
+@@ -39,6 +39,14 @@
+ 297:  wr              %o5, FPRS_FEF, %fprs;           \
+ 298:
++#define VISEntryHalfFast(fail_label)                  \
++      rd              %fprs, %o5;                     \
++      andcc           %o5, FPRS_FEF, %g0;             \
++      be,pt           %icc, 297f;                     \
++       nop;                                           \
++      ba,a,pt         %xcc, fail_label;               \
++297:  wr              %o5, FPRS_FEF, %fprs;
++
+ #define VISExitHalf                                   \
+       wr              %o5, 0, %fprs;
+--- a/arch/sparc/lib/NG4memcpy.S
++++ b/arch/sparc/lib/NG4memcpy.S
+@@ -41,6 +41,10 @@
+ #endif
+ #endif
++#if !defined(EX_LD) && !defined(EX_ST)
++#define NON_USER_COPY
++#endif
++
+ #ifndef EX_LD
+ #define EX_LD(x)      x
+ #endif
+@@ -197,9 +201,13 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len
+        mov            EX_RETVAL(%o3), %o0
+ .Llarge_src_unaligned:
++#ifdef NON_USER_COPY
++      VISEntryHalfFast(.Lmedium_vis_entry_fail)
++#else
++      VISEntryHalf
++#endif
+       andn            %o2, 0x3f, %o4
+       sub             %o2, %o4, %o2
+-      VISEntryHalf
+       alignaddr       %o1, %g0, %g1
+       add             %o1, %o4, %o1
+       EX_LD(LOAD(ldd, %g1 + 0x00, %f0))
+@@ -240,6 +248,10 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len
+        nop
+       ba,a,pt         %icc, .Lmedium_unaligned
++#ifdef NON_USER_COPY
++.Lmedium_vis_entry_fail:
++       or             %o0, %o1, %g2
++#endif
+ .Lmedium:
+       LOAD(prefetch, %o1 + 0x40, #n_reads_strong)
+       andcc           %g2, 0x7, %g0
diff --git a/queue-3.10/sparc64-fix-userspace-fpu-register-corruptions.patch b/queue-3.10/sparc64-fix-userspace-fpu-register-corruptions.patch
new file mode 100644 (file)
index 0000000..707a83f
--- /dev/null
@@ -0,0 +1,196 @@
+From foo@baz Thu Aug 13 14:26:49 PDT 2015
+From: "David S. Miller" <davem@davemloft.net>
+Date: Thu, 6 Aug 2015 19:13:25 -0700
+Subject: sparc64: Fix userspace FPU register corruptions.
+
+From: "David S. Miller" <davem@davemloft.net>
+
+[ Upstream commit 44922150d87cef616fd183220d43d8fde4d41390 ]
+
+If we have a series of events from userpsace, with %fprs=FPRS_FEF,
+like follows:
+
+ETRAP
+       ETRAP
+               VIS_ENTRY(fprs=0x4)
+               VIS_EXIT
+               RTRAP (kernel FPU restore with fpu_saved=0x4)
+       RTRAP
+
+We will not restore the user registers that were clobbered by the FPU
+using kernel code in the inner-most trap.
+
+Traps allocate FPU save slots in the thread struct, and FPU using
+sequences save the "dirty" FPU registers only.
+
+This works at the initial trap level because all of the registers
+get recorded into the top-level FPU save area, and we'll return
+to userspace with the FPU disabled so that any FPU use by the user
+will take an FPU disabled trap wherein we'll load the registers
+back up properly.
+
+But this is not how trap returns from kernel to kernel operate.
+
+The simplest fix for this bug is to always save all FPU register state
+for anything other than the top-most FPU save area.
+
+Getting rid of the optimized inner-slot FPU saving code ends up
+making VISEntryHalf degenerate into plain VISEntry.
+
+Longer term we need to do something smarter to reinstate the partial
+save optimizations.  Perhaps the fundament error is having trap entry
+and exit allocate FPU save slots and restore register state.  Instead,
+the VISEntry et al. calls should be doing that work.
+
+This bug is about two decades old.
+
+Reported-by: James Y Knight <jyknight@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sparc/include/asm/visasm.h |   16 ++-------
+ arch/sparc/lib/NG4memcpy.S      |    5 ++
+ arch/sparc/lib/VISsave.S        |   67 +---------------------------------------
+ arch/sparc/lib/ksyms.c          |    4 --
+ 4 files changed, 11 insertions(+), 81 deletions(-)
+
+--- a/arch/sparc/include/asm/visasm.h
++++ b/arch/sparc/include/asm/visasm.h
+@@ -28,16 +28,10 @@
+  * Must preserve %o5 between VISEntryHalf and VISExitHalf */
+ #define VISEntryHalf                                  \
+-      rd              %fprs, %o5;                     \
+-      andcc           %o5, FPRS_FEF, %g0;             \
+-      be,pt           %icc, 297f;                     \
+-       sethi          %hi(298f), %g7;                 \
+-      sethi           %hi(VISenterhalf), %g1;         \
+-      jmpl            %g1 + %lo(VISenterhalf), %g0;   \
+-       or             %g7, %lo(298f), %g7;            \
+-      clr             %o5;                            \
+-297:  wr              %o5, FPRS_FEF, %fprs;           \
+-298:
++      VISEntry
++
++#define VISExitHalf                                   \
++      VISExit
+ #define VISEntryHalfFast(fail_label)                  \
+       rd              %fprs, %o5;                     \
+@@ -47,7 +41,7 @@
+       ba,a,pt         %xcc, fail_label;               \
+ 297:  wr              %o5, FPRS_FEF, %fprs;
+-#define VISExitHalf                                   \
++#define VISExitHalfFast                                       \
+       wr              %o5, 0, %fprs;
+ #ifndef __ASSEMBLY__
+--- a/arch/sparc/lib/NG4memcpy.S
++++ b/arch/sparc/lib/NG4memcpy.S
+@@ -240,8 +240,11 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len
+       add             %o0, 0x40, %o0
+       bne,pt          %icc, 1b
+        LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
++#ifdef NON_USER_COPY
++      VISExitHalfFast
++#else
+       VISExitHalf
+-
++#endif
+       brz,pn          %o2, .Lexit
+        cmp            %o2, 19
+       ble,pn          %icc, .Lsmall_unaligned
+--- a/arch/sparc/lib/VISsave.S
++++ b/arch/sparc/lib/VISsave.S
+@@ -44,9 +44,8 @@ vis1:        ldub            [%g6 + TI_FPSAVED], %g3
+        stx            %g3, [%g6 + TI_GSR]
+ 2:    add             %g6, %g1, %g3
+-      cmp             %o5, FPRS_DU
+-      be,pn           %icc, 6f
+-       sll            %g1, 3, %g1
++      mov             FPRS_DU | FPRS_DL | FPRS_FEF, %o5
++      sll             %g1, 3, %g1
+       stb             %o5, [%g3 + TI_FPSAVED]
+       rd              %gsr, %g2
+       add             %g6, %g1, %g3
+@@ -80,65 +79,3 @@ vis1:       ldub            [%g6 + TI_FPSAVED], %g3
+       .align          32
+ 80:   jmpl            %g7 + %g0, %g0
+        nop
+-
+-6:    ldub            [%g3 + TI_FPSAVED], %o5
+-      or              %o5, FPRS_DU, %o5
+-      add             %g6, TI_FPREGS+0x80, %g2
+-      stb             %o5, [%g3 + TI_FPSAVED]
+-
+-      sll             %g1, 5, %g1
+-      add             %g6, TI_FPREGS+0xc0, %g3
+-      wr              %g0, FPRS_FEF, %fprs
+-      membar          #Sync
+-      stda            %f32, [%g2 + %g1] ASI_BLK_P
+-      stda            %f48, [%g3 + %g1] ASI_BLK_P
+-      membar          #Sync
+-      ba,pt           %xcc, 80f
+-       nop
+-
+-      .align          32
+-80:   jmpl            %g7 + %g0, %g0
+-       nop
+-
+-      .align          32
+-VISenterhalf:
+-      ldub            [%g6 + TI_FPDEPTH], %g1
+-      brnz,a,pn       %g1, 1f
+-       cmp            %g1, 1
+-      stb             %g0, [%g6 + TI_FPSAVED]
+-      stx             %fsr, [%g6 + TI_XFSR]
+-      clr             %o5
+-      jmpl            %g7 + %g0, %g0
+-       wr             %g0, FPRS_FEF, %fprs
+-
+-1:    bne,pn          %icc, 2f
+-       srl            %g1, 1, %g1
+-      ba,pt           %xcc, vis1
+-       sub            %g7, 8, %g7
+-2:    addcc           %g6, %g1, %g3
+-      sll             %g1, 3, %g1
+-      andn            %o5, FPRS_DU, %g2
+-      stb             %g2, [%g3 + TI_FPSAVED]
+-
+-      rd              %gsr, %g2
+-      add             %g6, %g1, %g3
+-      stx             %g2, [%g3 + TI_GSR]
+-      add             %g6, %g1, %g2
+-      stx             %fsr, [%g2 + TI_XFSR]
+-      sll             %g1, 5, %g1
+-3:    andcc           %o5, FPRS_DL, %g0
+-      be,pn           %icc, 4f
+-       add            %g6, TI_FPREGS, %g2
+-
+-      add             %g6, TI_FPREGS+0x40, %g3
+-      membar          #Sync
+-      stda            %f0, [%g2 + %g1] ASI_BLK_P
+-      stda            %f16, [%g3 + %g1] ASI_BLK_P
+-      membar          #Sync
+-      ba,pt           %xcc, 4f
+-       nop
+-
+-      .align          32
+-4:    and             %o5, FPRS_DU, %o5
+-      jmpl            %g7 + %g0, %g0
+-       wr             %o5, FPRS_FEF, %fprs
+--- a/arch/sparc/lib/ksyms.c
++++ b/arch/sparc/lib/ksyms.c
+@@ -126,10 +126,6 @@ EXPORT_SYMBOL(copy_user_page);
+ void VISenter(void);
+ EXPORT_SYMBOL(VISenter);
+-/* CRYPTO code needs this */
+-void VISenterhalf(void);
+-EXPORT_SYMBOL(VISenterhalf);
+-
+ extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *);
+ extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *,
+               unsigned long *);