1 From 8861249c740fc4af9ddc5aee321eafefb960d7c6 Mon Sep 17 00:00:00 2001
2 From: "megha.dey@linux.intel.com" <megha.dey@linux.intel.com>
3 Date: Wed, 2 Aug 2017 13:49:09 -0700
4 Subject: crypto: x86/sha1 - Fix reads beyond the number of blocks passed
6 From: megha.dey@linux.intel.com <megha.dey@linux.intel.com>
8 commit 8861249c740fc4af9ddc5aee321eafefb960d7c6 upstream.
10 It was reported that the sha1 AVX2 function(sha1_transform_avx2) is
11 reading ahead beyond its intended data, and causing a crash if the next
12 block is beyond page boundary:
13 http://marc.info/?l=linux-crypto-vger&m=149373371023377
15 This patch makes sure that there is no overflow for any buffer length.
17 It passes the tests written by Jan Stancek that revealed this problem:
18 https://github.com/jstancek/sha1-avx2-crash
20 I have re-enabled sha1-avx2 by reverting commit
21 b82ce24426a4071da9529d726057e4e642948667
23 Fixes: b82ce24426a4 ("crypto: sha1-ssse3 - Disable avx2")
24 Originally-by: Ilya Albrekht <ilya.albrekht@intel.com>
25 Tested-by: Jan Stancek <jstancek@redhat.com>
26 Signed-off-by: Megha Dey <megha.dey@linux.intel.com>
27 Reported-by: Jan Stancek <jstancek@redhat.com>
28 Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
29 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
32 arch/x86/crypto/sha1_avx2_x86_64_asm.S | 67 +++++++++++++++++----------------
33 arch/x86/crypto/sha1_ssse3_glue.c | 2
34 2 files changed, 37 insertions(+), 32 deletions(-)
36 --- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
37 +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
44 +#define BLOCKS_CTR %r8
45 #define BUFFER_PTR %r10
46 #define BUFFER_PTR2 %r13
47 -#define BUFFER_END %r11
49 #define PRECALC_BUF %r14
52 * blended AVX2 and ALU instruction scheduling
53 * 1 vector iteration per 8 rounds
55 - vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP
56 + vmovdqu (i * 2)(BUFFER_PTR), W_TMP
57 .elseif ((i & 7) == 1)
58 - vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\
59 + vinsertf128 $1, ((i-1) * 2)(BUFFER_PTR2),\
61 .elseif ((i & 7) == 2)
62 vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY
63 .elseif ((i & 7) == 4)
64 - vpaddd K_XMM(K_BASE), WY, WY_TMP
65 + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
66 .elseif ((i & 7) == 7)
67 vmovdqu WY_TMP, PRECALC_WK(i&~7)
70 vpxor WY, WY_TMP, WY_TMP
71 .elseif ((i & 7) == 7)
72 vpxor WY_TMP2, WY_TMP, WY
73 - vpaddd K_XMM(K_BASE), WY, WY_TMP
74 + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
75 vmovdqu WY_TMP, PRECALC_WK(i&~7)
81 .elseif ((i & 7) == 7)
82 - vpaddd K_XMM(K_BASE), WY, WY_TMP
83 + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
84 vmovdqu WY_TMP, PRECALC_WK(i&~7)
91 +/* Add constant only if (%2 > %3) condition met (uses RTA as temp)
92 + * %1 + %2 >= %3 ? %4 : 0
94 +.macro ADD_IF_GE a, b, c, d
102 * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining
104 @@ -463,13 +472,16 @@
105 lea (2*4*80+32)(%rsp), WK_BUF
107 # Precalc WK for first 2 blocks
109 + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 2, 64
115 - PRECALC_OFFSET = 128
117 + /* Go to next block if needed */
118 + ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 3, 128
119 + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128
120 xchg WK_BUF, PRECALC_BUF
123 @@ -479,8 +491,8 @@ _loop:
124 * we use K_BASE value as a signal of a last block,
125 * it is set below by: cmovae BUFFER_PTR, K_BASE
127 - cmp K_BASE, BUFFER_PTR
129 + test BLOCKS_CTR, BLOCKS_CTR
134 @@ -512,10 +524,10 @@ _loop0:
138 - add $(2*64), BUFFER_PTR /* move to next odd-64-byte block */
139 - cmp BUFFER_END, BUFFER_PTR /* is current block the last one? */
140 - cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */
142 + /* Update Counter */
144 + /* Move to the next block only if needed*/
145 + ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 4, 128
149 @@ -532,8 +544,8 @@ _loop0:
150 UPDATE_HASH 12(HASH_PTR), D
151 UPDATE_HASH 16(HASH_PTR), E
153 - cmp K_BASE, BUFFER_PTR /* is current block the last one? */
155 + test BLOCKS_CTR, BLOCKS_CTR
160 @@ -575,10 +587,10 @@ _loop2:
164 - add $(2*64), BUFFER_PTR2 /* move to next even-64-byte block */
166 - cmp BUFFER_END, BUFFER_PTR2 /* is current block the last one */
167 - cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */
168 + /* update counter */
170 + /* Move to the next block only if needed*/
171 + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128
175 @@ -641,19 +653,12 @@ _loop3:
179 - lea K_XMM_AR(%rip), K_BASE
181 + /* Setup initial values */
184 - lea 64(BUF), BUFFER_PTR2
186 - shl $6, CNT /* mul by 64 */
189 - mov CNT, BUFFER_END
191 - cmp BUFFER_END, BUFFER_PTR2
192 - cmovae K_BASE, BUFFER_PTR2
193 + mov BUF, BUFFER_PTR2
194 + mov CNT, BLOCKS_CTR
196 xmm_mov BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP
198 --- a/arch/x86/crypto/sha1_ssse3_glue.c
199 +++ b/arch/x86/crypto/sha1_ssse3_glue.c
200 @@ -201,7 +201,7 @@ asmlinkage void sha1_transform_avx2(u32
202 static bool avx2_usable(void)
204 - if (false && avx_usable() && boot_cpu_has(X86_FEATURE_AVX2)
205 + if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2)
206 && boot_cpu_has(X86_FEATURE_BMI1)
207 && boot_cpu_has(X86_FEATURE_BMI2))