]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/4.9.45/crypto-x86-sha1-fix-reads-beyond-the-number-of-blocks-passed.patch
4.9-stable patches
[thirdparty/kernel/stable-queue.git] / releases / 4.9.45 / crypto-x86-sha1-fix-reads-beyond-the-number-of-blocks-passed.patch
1 From 8861249c740fc4af9ddc5aee321eafefb960d7c6 Mon Sep 17 00:00:00 2001
2 From: "megha.dey@linux.intel.com" <megha.dey@linux.intel.com>
3 Date: Wed, 2 Aug 2017 13:49:09 -0700
4 Subject: crypto: x86/sha1 - Fix reads beyond the number of blocks passed
5
6 From: megha.dey@linux.intel.com <megha.dey@linux.intel.com>
7
8 commit 8861249c740fc4af9ddc5aee321eafefb960d7c6 upstream.
9
10 It was reported that the sha1 AVX2 function(sha1_transform_avx2) is
11 reading ahead beyond its intended data, and causing a crash if the next
12 block is beyond page boundary:
13 http://marc.info/?l=linux-crypto-vger&m=149373371023377
14
15 This patch makes sure that there is no overflow for any buffer length.
16
17 It passes the tests written by Jan Stancek that revealed this problem:
18 https://github.com/jstancek/sha1-avx2-crash
19
20 I have re-enabled sha1-avx2 by reverting commit
21 b82ce24426a4071da9529d726057e4e642948667
22
23 Fixes: b82ce24426a4 ("crypto: sha1-ssse3 - Disable avx2")
24 Originally-by: Ilya Albrekht <ilya.albrekht@intel.com>
25 Tested-by: Jan Stancek <jstancek@redhat.com>
26 Signed-off-by: Megha Dey <megha.dey@linux.intel.com>
27 Reported-by: Jan Stancek <jstancek@redhat.com>
28 Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
29 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
30
31 ---
32 arch/x86/crypto/sha1_avx2_x86_64_asm.S | 67 +++++++++++++++++----------------
33 arch/x86/crypto/sha1_ssse3_glue.c | 2
34 2 files changed, 37 insertions(+), 32 deletions(-)
35
36 --- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
37 +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
38 @@ -117,11 +117,10 @@
39 .set T1, REG_T1
40 .endm
41
42 -#define K_BASE %r8
43 #define HASH_PTR %r9
44 +#define BLOCKS_CTR %r8
45 #define BUFFER_PTR %r10
46 #define BUFFER_PTR2 %r13
47 -#define BUFFER_END %r11
48
49 #define PRECALC_BUF %r14
50 #define WK_BUF %r15
51 @@ -205,14 +204,14 @@
52 * blended AVX2 and ALU instruction scheduling
53 * 1 vector iteration per 8 rounds
54 */
55 - vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP
56 + vmovdqu (i * 2)(BUFFER_PTR), W_TMP
57 .elseif ((i & 7) == 1)
58 - vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\
59 + vinsertf128 $1, ((i-1) * 2)(BUFFER_PTR2),\
60 WY_TMP, WY_TMP
61 .elseif ((i & 7) == 2)
62 vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY
63 .elseif ((i & 7) == 4)
64 - vpaddd K_XMM(K_BASE), WY, WY_TMP
65 + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
66 .elseif ((i & 7) == 7)
67 vmovdqu WY_TMP, PRECALC_WK(i&~7)
68
69 @@ -255,7 +254,7 @@
70 vpxor WY, WY_TMP, WY_TMP
71 .elseif ((i & 7) == 7)
72 vpxor WY_TMP2, WY_TMP, WY
73 - vpaddd K_XMM(K_BASE), WY, WY_TMP
74 + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
75 vmovdqu WY_TMP, PRECALC_WK(i&~7)
76
77 PRECALC_ROTATE_WY
78 @@ -291,7 +290,7 @@
79 vpsrld $30, WY, WY
80 vpor WY, WY_TMP, WY
81 .elseif ((i & 7) == 7)
82 - vpaddd K_XMM(K_BASE), WY, WY_TMP
83 + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
84 vmovdqu WY_TMP, PRECALC_WK(i&~7)
85
86 PRECALC_ROTATE_WY
87 @@ -446,6 +445,16 @@
88
89 .endm
90
91 +/* Add constant only if (%2 > %3) condition met (uses RTA as temp)
92 + * %1 + %2 >= %3 ? %4 : 0
93 + */
94 +.macro ADD_IF_GE a, b, c, d
95 + mov \a, RTA
96 + add $\d, RTA
97 + cmp $\c, \b
98 + cmovge RTA, \a
99 +.endm
100 +
101 /*
102 * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining
103 */
104 @@ -463,13 +472,16 @@
105 lea (2*4*80+32)(%rsp), WK_BUF
106
107 # Precalc WK for first 2 blocks
108 - PRECALC_OFFSET = 0
109 + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 2, 64
110 .set i, 0
111 .rept 160
112 PRECALC i
113 .set i, i + 1
114 .endr
115 - PRECALC_OFFSET = 128
116 +
117 + /* Go to next block if needed */
118 + ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 3, 128
119 + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128
120 xchg WK_BUF, PRECALC_BUF
121
122 .align 32
123 @@ -479,8 +491,8 @@ _loop:
124 * we use K_BASE value as a signal of a last block,
125 * it is set below by: cmovae BUFFER_PTR, K_BASE
126 */
127 - cmp K_BASE, BUFFER_PTR
128 - jne _begin
129 + test BLOCKS_CTR, BLOCKS_CTR
130 + jnz _begin
131 .align 32
132 jmp _end
133 .align 32
134 @@ -512,10 +524,10 @@ _loop0:
135 .set j, j+2
136 .endr
137
138 - add $(2*64), BUFFER_PTR /* move to next odd-64-byte block */
139 - cmp BUFFER_END, BUFFER_PTR /* is current block the last one? */
140 - cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */
141 -
142 + /* Update Counter */
143 + sub $1, BLOCKS_CTR
144 + /* Move to the next block only if needed*/
145 + ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 4, 128
146 /*
147 * rounds
148 * 60,62,64,66,68
149 @@ -532,8 +544,8 @@ _loop0:
150 UPDATE_HASH 12(HASH_PTR), D
151 UPDATE_HASH 16(HASH_PTR), E
152
153 - cmp K_BASE, BUFFER_PTR /* is current block the last one? */
154 - je _loop
155 + test BLOCKS_CTR, BLOCKS_CTR
156 + jz _loop
157
158 mov TB, B
159
160 @@ -575,10 +587,10 @@ _loop2:
161 .set j, j+2
162 .endr
163
164 - add $(2*64), BUFFER_PTR2 /* move to next even-64-byte block */
165 -
166 - cmp BUFFER_END, BUFFER_PTR2 /* is current block the last one */
167 - cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */
168 + /* update counter */
169 + sub $1, BLOCKS_CTR
170 + /* Move to the next block only if needed*/
171 + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128
172
173 jmp _loop3
174 _loop3:
175 @@ -641,19 +653,12 @@ _loop3:
176
177 avx2_zeroupper
178
179 - lea K_XMM_AR(%rip), K_BASE
180 -
181 + /* Setup initial values */
182 mov CTX, HASH_PTR
183 mov BUF, BUFFER_PTR
184 - lea 64(BUF), BUFFER_PTR2
185 -
186 - shl $6, CNT /* mul by 64 */
187 - add BUF, CNT
188 - add $64, CNT
189 - mov CNT, BUFFER_END
190
191 - cmp BUFFER_END, BUFFER_PTR2
192 - cmovae K_BASE, BUFFER_PTR2
193 + mov BUF, BUFFER_PTR2
194 + mov CNT, BLOCKS_CTR
195
196 xmm_mov BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP
197
198 --- a/arch/x86/crypto/sha1_ssse3_glue.c
199 +++ b/arch/x86/crypto/sha1_ssse3_glue.c
200 @@ -201,7 +201,7 @@ asmlinkage void sha1_transform_avx2(u32
201
202 static bool avx2_usable(void)
203 {
204 - if (false && avx_usable() && boot_cpu_has(X86_FEATURE_AVX2)
205 + if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2)
206 && boot_cpu_has(X86_FEATURE_BMI1)
207 && boot_cpu_has(X86_FEATURE_BMI2))
208 return true;