]>
Commit | Line | Data |
---|---|---|
56676f87 AP |
1 | #!/usr/bin/env perl |
2 | # Copyright 2017 The OpenSSL Project Authors. All Rights Reserved. | |
3 | # | |
4 | # Licensed under the OpenSSL license (the "License"). You may not use | |
5 | # this file except in compliance with the License. You can obtain a copy | |
6 | # in the file LICENSE in the source distribution or at | |
7 | # https://www.openssl.org/source/license.html | |
8 | # | |
9 | # ==================================================================== | |
10 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | |
11 | # project. The module is, however, dual licensed under OpenSSL and | |
12 | # CRYPTOGAMS licenses depending on where you obtain it. For further | |
13 | # details see http://www.openssl.org/~appro/cryptogams/. | |
14 | # ==================================================================== | |
15 | # | |
16 | # Keccak-1600 for ARMv4. | |
17 | # | |
18 | # June 2017. | |
19 | # | |
20 | # This is KECCAK_1X variant (see keccak1600.c) with bit interleaving. | |
21 | # How does it compare to Keccak Code Package? It's as fast, but several | |
22 | # times smaller, and is endian- and ISA-neutral. ISA neutrality means | |
23 | # that minimum ISA requirement is ARMv4, yet it can be assembled even | |
24 | # as ARMv7 Thumb-2. | |
25 | # | |
26 | ######################################################################## | |
27 | # Numbers are cycles per processed byte accounting even for input bit | |
28 | # interleaving. | |
29 | # | |
30 | # r=1600(*) r=1024 | |
31 | # | |
32 | # Cortex-A7 71/+180% 103 | |
33 | # Cortex-A8 48/+290% 69 | |
34 | # Cortex-A15 34/+210% 49 | |
35 | # | |
36 | # (*) Not used in real life, meaningful as estimate for single sponge | |
37 | # operation performance. Numbers after slash are improvement over | |
38 | # compiler-generated KECCAK_1X reference code. | |
39 | ||
aabfd329 AP |
40 | my @C = map("r$_",(0..9)); |
41 | my @E = map("r$_",(10..12,14)); | |
42 | ||
56676f87 AP |
43 | ######################################################################## |
44 | # Stack layout | |
45 | # ----->+-----------------------+ | |
46 | # | uint64_t A[5][5] | | |
47 | # | ... | | |
48 | # +200->+-----------------------+ | |
49 | # | uint64_t D[5] | | |
50 | # | ... | | |
51 | # +240->+-----------------------+ | |
52 | # | uint64_t T[2][5] | | |
53 | # | ... | | |
54 | # +320->+-----------------------+ | |
55 | # | saved lr | | |
56 | # +324->+-----------------------+ | |
57 | # | loop counter | | |
58 | # +328->+-----------------------+ | |
59 | # | ... | |
60 | ||
aabfd329 AP |
61 | my @A = map([ 8*$_, 8*($_+1), 8*($_+2), 8*($_+3), 8*($_+4) ], (0,5,10,15,20)); |
62 | my @D = map(8*$_, (25..29)); | |
63 | my @T = map([ 8*$_, 8*($_+1), 8*($_+2), 8*($_+3), 8*($_+4) ], (30,35)); | |
64 | ||
65 | $code.=<<___; | |
66 | .text | |
67 | ||
56676f87 AP |
68 | #if defined(__thumb2__) |
69 | .syntax unified | |
70 | .thumb | |
71 | #else | |
72 | .code 32 | |
73 | #endif | |
74 | ||
aabfd329 AP |
75 | .type iotas,%object |
76 | .align 5 | |
77 | iotas: | |
78 | .long 0x00000001, 0x00000000 | |
79 | .long 0x00000000, 0x00000089 | |
80 | .long 0x00000000, 0x8000008b | |
81 | .long 0x00000000, 0x80008080 | |
82 | .long 0x00000001, 0x0000008b | |
83 | .long 0x00000001, 0x00008000 | |
84 | .long 0x00000001, 0x80008088 | |
85 | .long 0x00000001, 0x80000082 | |
86 | .long 0x00000000, 0x0000000b | |
87 | .long 0x00000000, 0x0000000a | |
88 | .long 0x00000001, 0x00008082 | |
89 | .long 0x00000000, 0x00008003 | |
90 | .long 0x00000001, 0x0000808b | |
91 | .long 0x00000001, 0x8000000b | |
92 | .long 0x00000001, 0x8000008a | |
93 | .long 0x00000001, 0x80000081 | |
94 | .long 0x00000000, 0x80000081 | |
95 | .long 0x00000000, 0x80000008 | |
96 | .long 0x00000000, 0x00000083 | |
97 | .long 0x00000000, 0x80008003 | |
98 | .long 0x00000001, 0x80008088 | |
99 | .long 0x00000000, 0x80000088 | |
100 | .long 0x00000001, 0x00008000 | |
101 | .long 0x00000000, 0x80008082 | |
56676f87 | 102 | .size iotas,.-iotas |
aabfd329 | 103 | |
56676f87 | 104 | .type KeccakF1600_int, %function |
aabfd329 | 105 | .align 5 |
56676f87 AP |
106 | KeccakF1600_int: |
107 | ldmia sp,{@C[0]-@C[9]} @ A[0][0..4] | |
aabfd329 | 108 | add @E[0],sp,#$A[1][0] |
56676f87 AP |
109 | KeccakF1600_enter: |
110 | str lr,[sp,#320] | |
111 | eor @E[1],@E[1],@E[1] | |
112 | str @E[1],[sp,#324] | |
113 | b .Lround_enter | |
aabfd329 AP |
114 | |
115 | .align 4 | |
116 | .Lround: | |
56676f87 AP |
117 | ldmia sp,{@C[0]-@C[9]} @ A[0][0..4] |
118 | .Lround_enter: | |
aabfd329 AP |
119 | ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[1][0..1] |
120 | eor @C[0],@C[0],@E[0] | |
121 | add @E[0],sp,#$A[1][2] | |
122 | eor @C[1],@C[1],@E[1] | |
123 | eor @C[2],@C[2],@E[2] | |
124 | eor @C[3],@C[3],@E[3] | |
125 | ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[1][2..3] | |
126 | eor @C[4],@C[4],@E[0] | |
127 | add @E[0],sp,#$A[1][4] | |
128 | eor @C[5],@C[5],@E[1] | |
129 | eor @C[6],@C[6],@E[2] | |
130 | eor @C[7],@C[7],@E[3] | |
131 | ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[1][4]..A[2][0] | |
132 | eor @C[8],@C[8],@E[0] | |
133 | add @E[0],sp,#$A[2][1] | |
134 | eor @C[9],@C[9],@E[1] | |
135 | eor @C[0],@C[0],@E[2] | |
136 | eor @C[1],@C[1],@E[3] | |
137 | ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[2][1..2] | |
138 | eor @C[2],@C[2],@E[0] | |
139 | add @E[0],sp,#$A[2][3] | |
140 | eor @C[3],@C[3],@E[1] | |
141 | eor @C[4],@C[4],@E[2] | |
142 | eor @C[5],@C[5],@E[3] | |
143 | ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[2][3..4] | |
144 | eor @C[6],@C[6],@E[0] | |
145 | add @E[0],sp,#$A[3][0] | |
146 | eor @C[7],@C[7],@E[1] | |
147 | eor @C[8],@C[8],@E[2] | |
148 | eor @C[9],@C[9],@E[3] | |
149 | ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[3][0..1] | |
150 | eor @C[0],@C[0],@E[0] | |
151 | add @E[0],sp,#$A[3][2] | |
152 | eor @C[1],@C[1],@E[1] | |
153 | eor @C[2],@C[2],@E[2] | |
154 | eor @C[3],@C[3],@E[3] | |
155 | ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[3][2..3] | |
156 | eor @C[4],@C[4],@E[0] | |
157 | add @E[0],sp,#$A[3][4] | |
158 | eor @C[5],@C[5],@E[1] | |
159 | eor @C[6],@C[6],@E[2] | |
160 | eor @C[7],@C[7],@E[3] | |
161 | ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[3][4]..A[4][0] | |
162 | eor @C[8],@C[8],@E[0] | |
163 | add @E[0],sp,#$A[4][1] | |
164 | eor @C[9],@C[9],@E[1] | |
165 | eor @C[0],@C[0],@E[2] | |
166 | eor @C[1],@C[1],@E[3] | |
167 | ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[4][1..2] | |
168 | eor @C[2],@C[2],@E[0] | |
169 | add @E[0],sp,#$A[4][3] | |
170 | eor @C[3],@C[3],@E[1] | |
171 | eor @C[4],@C[4],@E[2] | |
172 | eor @C[5],@C[5],@E[3] | |
173 | ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[4][3..4] | |
174 | eor @C[6],@C[6],@E[0] | |
175 | eor @C[7],@C[7],@E[1] | |
176 | eor @C[8],@C[8],@E[2] | |
177 | eor @C[9],@C[9],@E[3] | |
178 | ||
179 | eor @E[0],@C[0],@C[5],ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0]; | |
180 | eor @E[1],@C[1],@C[4] | |
181 | str @E[0],[sp,#$D[1]] @ D[1] = E[0] | |
182 | eor @E[2],@C[6],@C[1],ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3]; | |
183 | str @E[1],[sp,#$D[1]+4] | |
184 | eor @E[3],@C[7],@C[0] | |
185 | str @E[2],[sp,#$D[4]] @ D[4] = E[1] | |
186 | eor @C[0],@C[8],@C[3],ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4]; | |
187 | str @E[3],[sp,#$D[4]+4] | |
188 | eor @C[1],@C[9],@C[2] | |
189 | str @C[0],[sp,#$D[0]] @ D[0] = C[0] | |
190 | eor @C[2],@C[2],@C[7],ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1]; | |
191 | str @C[1],[sp,#$D[0]+4] | |
192 | eor @C[3],@C[3],@C[6] | |
193 | str @C[2],[sp,#$D[2]] @ D[2] = C[1] | |
194 | eor @C[4],@C[4],@C[9],ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2]; | |
195 | str @C[3],[sp,#$D[2]+4] | |
196 | eor @C[5],@C[5],@C[8] | |
56676f87 AP |
197 | ldr @C[8],[sp,#$A[3][0]] |
198 | ldr @C[9],[sp,#$A[3][0]+4] | |
aabfd329 AP |
199 | str @C[4],[sp,#$D[3]] @ D[3] = C[2] |
200 | str @C[5],[sp,#$D[3]+4] | |
201 | ||
53718107 | 202 | ldr @C[6],[sp,#$A[0][1]] |
aabfd329 | 203 | eor @C[8],@C[8],@C[0] |
56676f87 | 204 | ldr @C[7],[sp,#$A[0][1]+4] |
aabfd329 AP |
205 | eor @C[9],@C[9],@C[1] |
206 | str @C[8],[sp,#$T[0][0]] @ T[0][0] = A[3][0] ^ C[0]; /* borrow T[0][0] */ | |
53718107 | 207 | ldr @C[8],[sp,#$A[0][2]] |
aabfd329 | 208 | str @C[9],[sp,#$T[0][0]+4] |
53718107 | 209 | ldr @C[9],[sp,#$A[0][2]+4] |
aabfd329 AP |
210 | eor @C[6],@C[6],@E[0] |
211 | eor @C[7],@C[7],@E[1] | |
212 | str @C[6],[sp,#$T[0][1]] @ T[0][1] = A[0][1] ^ E[0]; /* D[1] */ | |
53718107 | 213 | ldr @C[6],[sp,#$A[0][3]] |
aabfd329 | 214 | str @C[7],[sp,#$T[0][1]+4] |
53718107 | 215 | ldr @C[7],[sp,#$A[0][3]+4] |
aabfd329 AP |
216 | eor @C[8],@C[8],@C[2] |
217 | eor @C[9],@C[9],@C[3] | |
218 | str @C[8],[sp,#$T[0][2]] @ T[0][2] = A[0][2] ^ C[1]; /* D[2] */ | |
53718107 | 219 | ldr @C[8],[sp,#$A[0][4]] |
aabfd329 | 220 | str @C[9],[sp,#$T[0][2]+4] |
53718107 | 221 | ldr @C[9],[sp,#$A[0][4]+4] |
aabfd329 AP |
222 | eor @C[6],@C[6],@C[4] |
223 | eor @C[7],@C[7],@C[5] | |
224 | str @C[6],[sp,#$T[0][3]] @ T[0][3] = A[0][3] ^ C[2]; /* D[3] */ | |
aabfd329 | 225 | eor @C[8],@C[8],@E[2] |
53718107 | 226 | str @C[7],[sp,#$T[0][3]+4] |
aabfd329 | 227 | eor @C[9],@C[9],@E[3] |
56676f87 AP |
228 | ldr @C[6],[sp,#$A[3][3]] |
229 | ldr @C[7],[sp,#$A[3][3]+4] | |
aabfd329 AP |
230 | str @C[8],[sp,#$T[0][4]] @ T[0][4] = A[0][4] ^ E[1]; /* D[4] */ |
231 | str @C[9],[sp,#$T[0][4]+4] | |
232 | ||
53718107 | 233 | ldr @C[8],[sp,#$A[4][4]] |
aabfd329 | 234 | eor @C[4],@C[4],@C[6] |
56676f87 | 235 | ldr @C[9],[sp,#$A[4][4]+4] |
aabfd329 AP |
236 | eor @C[5],@C[5],@C[7] |
237 | ror @C[7],@C[4],#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */ | |
53718107 | 238 | ldr @C[4],[sp,#$A[0][0]] |
aabfd329 | 239 | ror @C[6],@C[5],#32-11 |
53718107 | 240 | ldr @C[5],[sp,#$A[0][0]+4] |
aabfd329 AP |
241 | eor @C[8],@C[8],@E[2] |
242 | eor @C[9],@C[9],@E[3] | |
243 | ror @C[8],@C[8],#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */ | |
53718107 | 244 | ldr @E[2],[sp,#$A[2][2]] |
aabfd329 | 245 | ror @C[9],@C[9],#32-7 |
53718107 | 246 | ldr @E[3],[sp,#$A[2][2]+4] |
aabfd329 AP |
247 | eor @C[0],@C[0],@C[4] |
248 | eor @C[1],@C[1],@C[5] @ C[0] = A[0][0] ^ C[0]; /* rotate by 0 */ /* D[0] */ | |
aabfd329 | 249 | eor @E[2],@E[2],@C[2] |
aabfd329 | 250 | ldr @C[2],[sp,#$A[1][1]] |
53718107 | 251 | eor @E[3],@E[3],@C[3] |
aabfd329 | 252 | ldr @C[3],[sp,#$A[1][1]+4] |
53718107 | 253 | ror @C[5],@E[2],#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); /* D[2] */ |
56676f87 | 254 | ldr @E[2],[sp,#324] @ load counter |
aabfd329 | 255 | eor @C[2],@C[2],@E[0] |
53718107 | 256 | ror @C[4],@E[3],#32-22 |
56676f87 | 257 | adr @E[3],iotas |
aabfd329 AP |
258 | eor @C[3],@C[3],@E[1] |
259 | ror @C[2],@C[2],#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); /* D[1] */ | |
56676f87 | 260 | add @E[3],@E[3],@E[2] |
aabfd329 AP |
261 | ror @C[3],@C[3],#32-22 |
262 | ||
56676f87 AP |
263 | ldr @E[0],[@E[3],#0] @ iotas[i].lo |
264 | add @E[2],@E[2],#8 | |
265 | ldr @E[1],[@E[3],#4] @ iotas[i].hi | |
266 | cmp @E[2],#192 | |
267 | str @E[2],[sp,#324] @ store counter | |
aabfd329 | 268 | |
56676f87 AP |
269 | bic @E[2],@C[4],@C[2] |
270 | bic @E[3],@C[5],@C[3] | |
271 | eor @E[2],@E[2],@C[0] | |
272 | eor @E[3],@E[3],@C[1] | |
aabfd329 AP |
273 | eor @E[0],@E[0],@E[2] |
274 | eor @E[1],@E[1],@E[3] | |
275 | str @E[0],[sp,#$A[0][0]] @ A[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i]; | |
276 | bic @E[2],@C[6],@C[4] | |
277 | str @E[1],[sp,#$A[0][0]+4] | |
278 | bic @E[3],@C[7],@C[5] | |
279 | eor @E[2],@E[2],@C[2] | |
280 | eor @E[3],@E[3],@C[3] | |
281 | str @E[2],[sp,#$A[0][1]] @ A[0][1] = C[1] ^ (~C[2] & C[3]); | |
282 | bic @E[0],@C[8],@C[6] | |
283 | str @E[3],[sp,#$A[0][1]+4] | |
284 | bic @E[1],@C[9],@C[7] | |
285 | eor @E[0],@E[0],@C[4] | |
286 | eor @E[1],@E[1],@C[5] | |
287 | str @E[0],[sp,#$A[0][2]] @ A[0][2] = C[2] ^ (~C[3] & C[4]); | |
288 | bic @E[2],@C[0],@C[8] | |
289 | str @E[1],[sp,#$A[0][2]+4] | |
290 | bic @E[3],@C[1],@C[9] | |
291 | eor @E[2],@E[2],@C[6] | |
292 | eor @E[3],@E[3],@C[7] | |
293 | str @E[2],[sp,#$A[0][3]] @ A[0][3] = C[3] ^ (~C[4] & C[0]); | |
294 | bic @E[0],@C[2],@C[0] | |
295 | str @E[3],[sp,#$A[0][3]+4] | |
53718107 | 296 | add @E[3],sp,#$D[0] |
aabfd329 AP |
297 | bic @E[1],@C[3],@C[1] |
298 | eor @E[0],@E[0],@C[8] | |
299 | eor @E[1],@E[1],@C[9] | |
300 | str @E[0],[sp,#$A[0][4]] @ A[0][4] = C[4] ^ (~C[0] & C[1]); | |
301 | str @E[1],[sp,#$A[0][4]+4] | |
302 | ||
aabfd329 | 303 | ldmia @E[3],{@C[6]-@C[9],@E[0],@E[1],@E[2],@E[3]} @ D[0..3] |
aabfd329 AP |
304 | ldr @C[0],[sp,#$A[1][0]] |
305 | ldr @C[1],[sp,#$A[1][0]+4] | |
53718107 AP |
306 | ldr @C[2],[sp,#$A[2][1]] |
307 | ldr @C[3],[sp,#$A[2][1]+4] | |
56676f87 | 308 | ldr @C[4],[sp,#$D[4]] |
aabfd329 | 309 | eor @C[0],@C[0],@C[6] |
56676f87 | 310 | ldr @C[5],[sp,#$D[4]+4] |
aabfd329 AP |
311 | eor @C[1],@C[1],@C[7] |
312 | str @C[0],[sp,#$T[1][0]] @ T[1][0] = A[1][0] ^ (C[3] = D[0]); | |
53718107 | 313 | add @C[0],sp,#$A[1][2] |
aabfd329 | 314 | str @C[1],[sp,#$T[1][0]+4] |
aabfd329 AP |
315 | eor @C[2],@C[2],@C[8] |
316 | eor @C[3],@C[3],@C[9] | |
317 | str @C[2],[sp,#$T[1][1]] @ T[1][1] = A[2][1] ^ (C[4] = D[1]); /* borrow T[1][1] */ | |
318 | str @C[3],[sp,#$T[1][1]+4] | |
aabfd329 AP |
319 | ldmia @C[0],{@C[0]-@C[3]} @ A[1][2..3] |
320 | eor @C[0],@C[0],@E[0] | |
321 | eor @C[1],@C[1],@E[1] | |
322 | str @C[0],[sp,#$T[1][2]] @ T[1][2] = A[1][2] ^ (E[0] = D[2]); | |
53718107 | 323 | ldr @C[0],[sp,#$A[2][4]] |
aabfd329 | 324 | str @C[1],[sp,#$T[1][2]+4] |
53718107 | 325 | ldr @C[1],[sp,#$A[2][4]+4] |
aabfd329 AP |
326 | eor @C[2],@C[2],@E[2] |
327 | eor @C[3],@C[3],@E[3] | |
328 | str @C[2],[sp,#$T[1][3]] @ T[1][3] = A[1][3] ^ (E[1] = D[3]); | |
53718107 | 329 | ldr @C[2],[sp,#$T[0][3]] |
aabfd329 | 330 | str @C[3],[sp,#$T[1][3]+4] |
53718107 | 331 | ldr @C[3],[sp,#$T[0][3]+4] |
aabfd329 | 332 | eor @C[0],@C[0],@C[4] |
53718107 | 333 | ldr @E[2],[sp,#$A[1][4]] |
aabfd329 | 334 | eor @C[1],@C[1],@C[5] |
53718107 | 335 | ldr @E[3],[sp,#$A[1][4]+4] |
aabfd329 | 336 | str @C[0],[sp,#$T[1][4]] @ T[1][4] = A[2][4] ^ (C[2] = D[4]); /* borrow T[1][4] */ |
aabfd329 | 337 | |
aabfd329 | 338 | ror @C[0],@C[2],#32-14 @ C[0] = ROL64(T[0][3], rhotates[0][3]); |
53718107 | 339 | str @C[1],[sp,#$T[1][4]+4] |
aabfd329 | 340 | ror @C[1],@C[3],#32-14 |
aabfd329 | 341 | eor @C[2],@E[2],@C[4] |
53718107 | 342 | ldr @C[4],[sp,#$A[2][0]] |
aabfd329 | 343 | eor @C[3],@E[3],@C[5] |
53718107 | 344 | ldr @C[5],[sp,#$A[2][0]+4] |
aabfd329 | 345 | ror @C[2],@C[2],#32-10 @ C[1] = ROL64(A[1][4] ^ C[2], rhotates[1][4]); /* D[4] */ |
53718107 | 346 | ldr @E[2],[sp,#$A[3][1]] |
aabfd329 | 347 | ror @C[3],@C[3],#32-10 |
53718107 | 348 | ldr @E[3],[sp,#$A[3][1]+4] |
aabfd329 AP |
349 | eor @C[6],@C[6],@C[4] |
350 | eor @C[7],@C[7],@C[5] | |
351 | ror @C[5],@C[6],#32-1 @ C[2] = ROL64(A[2][0] ^ C[3], rhotates[2][0]); /* D[0] */ | |
aabfd329 | 352 | eor @E[2],@E[2],@C[8] |
53718107 | 353 | ror @C[4],@C[7],#32-2 |
aabfd329 | 354 | ldr @C[8],[sp,#$A[4][2]] |
53718107 | 355 | eor @E[3],@E[3],@C[9] |
aabfd329 | 356 | ldr @C[9],[sp,#$A[4][2]+4] |
53718107 | 357 | ror @C[7],@E[2],#32-22 @ C[3] = ROL64(A[3][1] ^ C[4], rhotates[3][1]); /* D[1] */ |
aabfd329 | 358 | eor @E[0],@E[0],@C[8] |
53718107 | 359 | ror @C[6],@E[3],#32-23 |
aabfd329 AP |
360 | eor @E[1],@E[1],@C[9] |
361 | ror @C[9],@E[0],#32-30 @ C[4] = ROL64(A[4][2] ^ E[0], rhotates[4][2]); /* D[2] */ | |
aabfd329 AP |
362 | |
363 | bic @E[0],@C[4],@C[2] | |
53718107 | 364 | ror @C[8],@E[1],#32-31 |
aabfd329 AP |
365 | bic @E[1],@C[5],@C[3] |
366 | eor @E[0],@E[0],@C[0] | |
367 | eor @E[1],@E[1],@C[1] | |
368 | str @E[0],[sp,#$A[1][0]] @ A[1][0] = C[0] ^ (~C[1] & C[2]) | |
369 | bic @E[2],@C[6],@C[4] | |
370 | str @E[1],[sp,#$A[1][0]+4] | |
371 | bic @E[3],@C[7],@C[5] | |
372 | eor @E[2],@E[2],@C[2] | |
373 | eor @E[3],@E[3],@C[3] | |
374 | str @E[2],[sp,#$A[1][1]] @ A[1][1] = C[1] ^ (~C[2] & C[3]); | |
375 | bic @E[0],@C[8],@C[6] | |
376 | str @E[3],[sp,#$A[1][1]+4] | |
377 | bic @E[1],@C[9],@C[7] | |
378 | eor @E[0],@E[0],@C[4] | |
379 | eor @E[1],@E[1],@C[5] | |
380 | str @E[0],[sp,#$A[1][2]] @ A[1][2] = C[2] ^ (~C[3] & C[4]); | |
381 | bic @E[2],@C[0],@C[8] | |
382 | str @E[1],[sp,#$A[1][2]+4] | |
383 | bic @E[3],@C[1],@C[9] | |
384 | eor @E[2],@E[2],@C[6] | |
385 | eor @E[3],@E[3],@C[7] | |
386 | str @E[2],[sp,#$A[1][3]] @ A[1][3] = C[3] ^ (~C[4] & C[0]); | |
387 | bic @E[0],@C[2],@C[0] | |
388 | str @E[3],[sp,#$A[1][3]+4] | |
53718107 | 389 | add @E[3],sp,#$D[3] |
aabfd329 | 390 | bic @E[1],@C[3],@C[1] |
53718107 | 391 | ldr @C[1],[sp,#$T[0][1]] |
aabfd329 | 392 | eor @E[0],@E[0],@C[8] |
53718107 | 393 | ldr @C[0],[sp,#$T[0][1]+4] |
aabfd329 AP |
394 | eor @E[1],@E[1],@C[9] |
395 | str @E[0],[sp,#$A[1][4]] @ A[1][4] = C[4] ^ (~C[0] & C[1]); | |
396 | str @E[1],[sp,#$A[1][4]+4] | |
397 | ||
aabfd329 AP |
398 | ldr @C[2],[sp,#$T[1][2]] |
399 | ldr @C[3],[sp,#$T[1][2]+4] | |
aabfd329 AP |
400 | ldmia @E[3],{@E[0]-@E[2],@E[3]} @ D[3..4] |
401 | ldr @C[4],[sp,#$A[2][3]] | |
53718107 | 402 | ror @C[0],@C[0],#32-1 @ C[0] = ROL64(T[0][1], rhotates[0][1]); |
aabfd329 | 403 | ldr @C[5],[sp,#$A[2][3]+4] |
53718107 AP |
404 | ror @C[2],@C[2],#32-3 @ C[1] = ROL64(T[1][2], rhotates[1][2]); |
405 | ldr @C[6],[sp,#$A[3][4]] | |
406 | ror @C[3],@C[3],#32-3 | |
407 | ldr @C[7],[sp,#$A[3][4]+4] | |
aabfd329 | 408 | eor @E[0],@E[0],@C[4] |
53718107 | 409 | ldr @C[8],[sp,#$A[4][0]] |
aabfd329 | 410 | eor @E[1],@E[1],@C[5] |
53718107 | 411 | ldr @C[9],[sp,#$A[4][0]+4] |
aabfd329 | 412 | ror @C[5],@E[0],#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]); |
53718107 | 413 | ldr @E[0],[sp,#$D[0]] |
aabfd329 | 414 | ror @C[4],@E[1],#32-13 |
53718107 | 415 | ldr @E[1],[sp,#$D[0]+4] |
aabfd329 AP |
416 | eor @C[6],@C[6],@E[2] |
417 | eor @C[7],@C[7],@E[3] | |
418 | ror @C[6],@C[6],#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]); | |
aabfd329 | 419 | eor @C[8],@C[8],@E[0] |
53718107 | 420 | ror @C[7],@C[7],#32-4 |
aabfd329 AP |
421 | eor @C[9],@C[9],@E[1] |
422 | ror @C[8],@C[8],#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]); | |
aabfd329 AP |
423 | |
424 | bic @E[0],@C[4],@C[2] | |
53718107 | 425 | ror @C[9],@C[9],#32-9 |
aabfd329 AP |
426 | bic @E[1],@C[5],@C[3] |
427 | eor @E[0],@E[0],@C[0] | |
428 | eor @E[1],@E[1],@C[1] | |
429 | str @E[0],[sp,#$A[2][0]] @ A[2][0] = C[0] ^ (~C[1] & C[2]) | |
430 | bic @E[2],@C[6],@C[4] | |
431 | str @E[1],[sp,#$A[2][0]+4] | |
432 | bic @E[3],@C[7],@C[5] | |
433 | eor @E[2],@E[2],@C[2] | |
434 | eor @E[3],@E[3],@C[3] | |
435 | str @E[2],[sp,#$A[2][1]] @ A[2][1] = C[1] ^ (~C[2] & C[3]); | |
436 | bic @E[0],@C[8],@C[6] | |
437 | str @E[3],[sp,#$A[2][1]+4] | |
438 | bic @E[1],@C[9],@C[7] | |
439 | eor @E[0],@E[0],@C[4] | |
440 | eor @E[1],@E[1],@C[5] | |
441 | str @E[0],[sp,#$A[2][2]] @ A[2][2] = C[2] ^ (~C[3] & C[4]); | |
442 | bic @E[2],@C[0],@C[8] | |
443 | str @E[1],[sp,#$A[2][2]+4] | |
444 | bic @E[3],@C[1],@C[9] | |
445 | eor @E[2],@E[2],@C[6] | |
446 | eor @E[3],@E[3],@C[7] | |
447 | str @E[2],[sp,#$A[2][3]] @ A[2][3] = C[3] ^ (~C[4] & C[0]); | |
448 | bic @E[0],@C[2],@C[0] | |
449 | str @E[3],[sp,#$A[2][3]+4] | |
450 | bic @E[1],@C[3],@C[1] | |
451 | eor @E[0],@E[0],@C[8] | |
452 | eor @E[1],@E[1],@C[9] | |
453 | str @E[0],[sp,#$A[2][4]] @ A[2][4] = C[4] ^ (~C[0] & C[1]); | |
53718107 | 454 | add @C[2],sp,#$T[1][0] |
aabfd329 AP |
455 | str @E[1],[sp,#$A[2][4]+4] |
456 | ||
53718107 | 457 | add @E[3],sp,#$D[2] |
aabfd329 AP |
458 | ldr @C[1],[sp,#$T[0][4]] |
459 | ldr @C[0],[sp,#$T[0][4]+4] | |
53718107 AP |
460 | ldmia @C[2],{@C[2]-@C[5]} @ T[1][0..1] |
461 | ldmia @E[3],{@E[0]-@E[2],@E[3]} @ D[2..3] | |
aabfd329 | 462 | ror @C[1],@C[1],#32-13 @ C[0] = ROL64(T[0][4], rhotates[0][4]); |
53718107 | 463 | ldr @C[6],[sp,#$A[3][2]] |
aabfd329 | 464 | ror @C[0],@C[0],#32-14 |
53718107 | 465 | ldr @C[7],[sp,#$A[3][2]+4] |
aabfd329 | 466 | ror @C[2],@C[2],#32-18 @ C[1] = ROL64(T[1][0], rhotates[1][0]); |
53718107 | 467 | ldr @C[8],[sp,#$A[4][3]] |
aabfd329 | 468 | ror @C[3],@C[3],#32-18 |
53718107 | 469 | ldr @C[9],[sp,#$A[4][3]+4] |
aabfd329 | 470 | ror @C[4],@C[4],#32-5 @ C[2] = ROL64(T[1][1], rhotates[2][1]); /* originally A[2][1] */ |
aabfd329 | 471 | eor @E[0],@E[0],@C[6] |
53718107 | 472 | ror @C[5],@C[5],#32-5 |
aabfd329 AP |
473 | eor @E[1],@E[1],@C[7] |
474 | ror @C[7],@E[0],#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]); | |
aabfd329 | 475 | eor @C[8],@C[8],@E[2] |
53718107 | 476 | ror @C[6],@E[1],#32-8 |
aabfd329 AP |
477 | eor @C[9],@C[9],@E[3] |
478 | ror @C[8],@C[8],#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]); | |
aabfd329 AP |
479 | |
480 | bic @E[0],@C[4],@C[2] | |
53718107 | 481 | ror @C[9],@C[9],#32-28 |
aabfd329 AP |
482 | bic @E[1],@C[5],@C[3] |
483 | eor @E[0],@E[0],@C[0] | |
484 | eor @E[1],@E[1],@C[1] | |
485 | str @E[0],[sp,#$A[3][0]] @ A[3][0] = C[0] ^ (~C[1] & C[2]) | |
486 | bic @E[2],@C[6],@C[4] | |
487 | str @E[1],[sp,#$A[3][0]+4] | |
488 | bic @E[3],@C[7],@C[5] | |
489 | eor @E[2],@E[2],@C[2] | |
490 | eor @E[3],@E[3],@C[3] | |
491 | str @E[2],[sp,#$A[3][1]] @ A[3][1] = C[1] ^ (~C[2] & C[3]); | |
492 | bic @E[0],@C[8],@C[6] | |
493 | str @E[3],[sp,#$A[3][1]+4] | |
494 | bic @E[1],@C[9],@C[7] | |
495 | eor @E[0],@E[0],@C[4] | |
496 | eor @E[1],@E[1],@C[5] | |
497 | str @E[0],[sp,#$A[3][2]] @ A[3][2] = C[2] ^ (~C[3] & C[4]); | |
498 | bic @E[2],@C[0],@C[8] | |
499 | str @E[1],[sp,#$A[3][2]+4] | |
500 | bic @E[3],@C[1],@C[9] | |
501 | eor @E[2],@E[2],@C[6] | |
502 | eor @E[3],@E[3],@C[7] | |
503 | str @E[2],[sp,#$A[3][3]] @ A[3][3] = C[3] ^ (~C[4] & C[0]); | |
504 | bic @E[0],@C[2],@C[0] | |
505 | str @E[3],[sp,#$A[3][3]+4] | |
506 | bic @E[1],@C[3],@C[1] | |
507 | eor @E[0],@E[0],@C[8] | |
508 | eor @E[1],@E[1],@C[9] | |
509 | str @E[0],[sp,#$A[3][4]] @ A[3][4] = C[4] ^ (~C[0] & C[1]); | |
53718107 | 510 | add @E[3],sp,#$T[1][3] |
aabfd329 AP |
511 | str @E[1],[sp,#$A[3][4]+4] |
512 | ||
513 | ldr @C[0],[sp,#$T[0][2]] | |
514 | ldr @C[1],[sp,#$T[0][2]+4] | |
aabfd329 | 515 | ldmia @E[3],{@E[0]-@E[2],@E[3]} @ T[1][3..4] |
aabfd329 | 516 | ldr @C[7],[sp,#$T[0][0]] |
53718107 | 517 | ror @C[0],@C[0],#32-31 @ C[0] = ROL64(T[0][2], rhotates[0][2]); |
aabfd329 | 518 | ldr @C[6],[sp,#$T[0][0]+4] |
53718107 | 519 | ror @C[1],@C[1],#32-31 |
aabfd329 | 520 | ldr @C[8],[sp,#$A[4][1]] |
53718107 | 521 | ror @C[3],@E[0],#32-27 @ C[1] = ROL64(T[1][3], rhotates[1][3]); |
aabfd329 | 522 | ldr @E[0],[sp,#$D[1]] |
53718107 AP |
523 | ror @C[2],@E[1],#32-28 |
524 | ldr @C[9],[sp,#$A[4][1]+4] | |
525 | ror @C[5],@E[2],#32-19 @ C[2] = ROL64(T[1][4], rhotates[2][4]); /* originally A[2][4] */ | |
aabfd329 | 526 | ldr @E[1],[sp,#$D[1]+4] |
53718107 | 527 | ror @C[4],@E[3],#32-20 |
aabfd329 | 528 | eor @C[8],@C[8],@E[0] |
53718107 | 529 | ror @C[7],@C[7],#32-20 @ C[3] = ROL64(T[0][0], rhotates[3][0]); /* originally A[3][0] */ |
aabfd329 | 530 | eor @C[9],@C[9],@E[1] |
53718107 | 531 | ror @C[6],@C[6],#32-21 |
aabfd329 AP |
532 | |
533 | bic @E[0],@C[4],@C[2] | |
53718107 | 534 | ror @C[8],@C[8],#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]); |
aabfd329 | 535 | bic @E[1],@C[5],@C[3] |
53718107 | 536 | ror @C[9],@C[9],#32-1 |
aabfd329 AP |
537 | eor @E[0],@E[0],@C[0] |
538 | eor @E[1],@E[1],@C[1] | |
539 | str @E[0],[sp,#$A[4][0]] @ A[4][0] = C[0] ^ (~C[1] & C[2]) | |
540 | bic @E[2],@C[6],@C[4] | |
541 | str @E[1],[sp,#$A[4][0]+4] | |
542 | bic @E[3],@C[7],@C[5] | |
543 | eor @E[2],@E[2],@C[2] | |
544 | eor @E[3],@E[3],@C[3] | |
545 | str @E[2],[sp,#$A[4][1]] @ A[4][1] = C[1] ^ (~C[2] & C[3]); | |
546 | bic @E[0],@C[8],@C[6] | |
547 | str @E[3],[sp,#$A[4][1]+4] | |
548 | bic @E[1],@C[9],@C[7] | |
549 | eor @E[0],@E[0],@C[4] | |
550 | eor @E[1],@E[1],@C[5] | |
551 | str @E[0],[sp,#$A[4][2]] @ A[4][2] = C[2] ^ (~C[3] & C[4]); | |
552 | bic @E[2],@C[0],@C[8] | |
553 | str @E[1],[sp,#$A[4][2]+4] | |
554 | bic @E[3],@C[1],@C[9] | |
555 | eor @E[2],@E[2],@C[6] | |
556 | eor @E[3],@E[3],@C[7] | |
557 | str @E[2],[sp,#$A[4][3]] @ A[4][3] = C[3] ^ (~C[4] & C[0]); | |
aabfd329 | 558 | bic @E[0],@C[2],@C[0] |
53718107 | 559 | str @E[3],[sp,#$A[4][3]+4] |
aabfd329 AP |
560 | bic @E[1],@C[3],@C[1] |
561 | eor @E[2],@E[0],@C[8] | |
aabfd329 | 562 | eor @E[3],@E[1],@C[9] |
aabfd329 | 563 | str @E[2],[sp,#$A[4][4]] @ A[4][4] = C[4] ^ (~C[0] & C[1]); |
56676f87 | 564 | add @E[0],sp,#$A[1][0] |
aabfd329 AP |
565 | str @E[3],[sp,#$A[4][4]+4] |
566 | ||
567 | blo .Lround | |
568 | ||
56676f87 AP |
569 | ldr pc,[sp,#320] |
570 | .size KeccakF1600_int,.-KeccakF1600_int | |
571 | ||
572 | .type KeccakF1600, %function | |
573 | .align 5 | |
574 | KeccakF1600: | |
575 | stmdb sp!,{r0,r4-r11,lr} | |
576 | sub sp,sp,#320+16 @ space for A[5][5],D[5],T[2][5],... | |
577 | ||
578 | add @E[0],r0,#$A[1][0] | |
579 | add @E[1],sp,#$A[1][0] | |
580 | mov @E[2],r0 | |
581 | ldmia @E[0]!,{@C[0]-@C[9]} @ copy A[5][5] to stack | |
582 | stmia @E[1]!,{@C[0]-@C[9]} | |
583 | ldmia @E[0]!,{@C[0]-@C[9]} | |
584 | stmia @E[1]!,{@C[0]-@C[9]} | |
585 | ldmia @E[0]!,{@C[0]-@C[9]} | |
586 | stmia @E[1]!,{@C[0]-@C[9]} | |
587 | ldmia @E[0], {@C[0]-@C[9]} | |
588 | stmia @E[1], {@C[0]-@C[9]} | |
589 | ldmia @E[2], {@C[0]-@C[9]} @ A[0][0..4] | |
590 | add @E[0],sp,#$A[1][0] | |
591 | stmia sp, {@C[0]-@C[9]} | |
592 | ||
593 | bl KeccakF1600_enter | |
594 | ||
595 | ldr @E[1], [sp,#320+16] @ restore pointer to A | |
596 | ldmia sp, {@C[0]-@C[9]} | |
597 | stmia @E[1]!,{@C[0]-@C[9]} @ return A[5][5] | |
aabfd329 AP |
598 | ldmia @E[0]!,{@C[0]-@C[9]} |
599 | stmia @E[1]!,{@C[0]-@C[9]} | |
600 | ldmia @E[0]!,{@C[0]-@C[9]} | |
601 | stmia @E[1]!,{@C[0]-@C[9]} | |
602 | ldmia @E[0]!,{@C[0]-@C[9]} | |
603 | stmia @E[1]!,{@C[0]-@C[9]} | |
56676f87 AP |
604 | ldmia @E[0], {@C[0]-@C[9]} |
605 | stmia @E[1], {@C[0]-@C[9]} | |
aabfd329 | 606 | |
56676f87 AP |
607 | add sp,sp,#320+20 |
608 | ldmia sp!,{r4-r11,pc} | |
aabfd329 AP |
609 | .size KeccakF1600,.-KeccakF1600 |
610 | ___ | |
56676f87 AP |
611 | { my ($hi,$lo,$i,$A_flat, $len,$bsz,$inp) = map("r$_",(5..8, 10..12)); |
612 | ||
613 | ######################################################################## | |
614 | # Stack layout | |
615 | # ----->+-----------------------+ | |
616 | # | uint64_t A[5][5] | | |
617 | # | ... | | |
618 | # | ... | | |
619 | # +336->+-----------------------+ | |
620 | # | uint64_t *A | | |
621 | # +340->+-----------------------+ | |
622 | # | const void *inp | | |
623 | # +344->+-----------------------+ | |
624 | # | size_t len | | |
625 | # +348->+-----------------------+ | |
626 | # | size_t bs | | |
627 | # +352->+-----------------------+ | |
628 | # | .... | |
629 | ||
630 | $code.=<<___; | |
631 | .global SHA3_absorb | |
632 | .type SHA3_absorb,%function | |
633 | .align 5 | |
634 | SHA3_absorb: | |
635 | stmdb sp!,{r0-r12,lr} | |
636 | sub sp,sp,#320+16 | |
637 | ||
638 | mov r12,r0 | |
639 | add r14,sp,#0 | |
640 | mov $len,r2 | |
641 | mov $bsz,r3 | |
642 | ||
643 | ldmia r12!,{@C[0]-@C[9]} @ copy A[5][5] to stack | |
644 | stmia r14!,{@C[0]-@C[9]} | |
645 | ldmia r12!,{@C[0]-@C[9]} | |
646 | stmia r14!,{@C[0]-@C[9]} | |
647 | ldmia r12!,{@C[0]-@C[9]} | |
648 | stmia r14!,{@C[0]-@C[9]} | |
649 | ldmia r12!,{@C[0]-@C[9]} | |
650 | stmia r14!,{@C[0]-@C[9]} | |
651 | ldmia r12, {@C[0]-@C[9]} | |
652 | stmia r14, {@C[0]-@C[9]} | |
653 | ||
654 | ldr $inp,[sp,#340] | |
655 | ||
656 | .Loop_absorb: | |
657 | subs r0,$len,$bsz | |
658 | blo .Labsorbed | |
659 | add $A_flat,sp,#0 | |
660 | str r0,[sp,#344] @ save len - bsz | |
661 | ||
662 | .Loop_block: | |
663 | ldmia $A_flat,{r2-r3} @ A_flat[i] | |
664 | ldrb r0,[$inp,#7]! @ inp[7] | |
665 | mov $i,#8 | |
666 | ||
667 | .Lane_loop: | |
668 | subs $i,$i,#1 | |
669 | lsl r1,r0,#24 | |
670 | blo .Lane_done | |
671 | #ifdef __thumb2__ | |
672 | it ne | |
673 | ldrbne r0,[$inp,#-1]! | |
674 | #else | |
675 | ldrneb r0,[$inp,#-1]! | |
676 | #endif | |
677 | adds r1,r1,r1 @ sip through carry flag | |
678 | adc $hi,$hi,$hi | |
679 | adds r1,r1,r1 | |
680 | adc $lo,$lo,$lo | |
681 | adds r1,r1,r1 | |
682 | adc $hi,$hi,$hi | |
683 | adds r1,r1,r1 | |
684 | adc $lo,$lo,$lo | |
685 | adds r1,r1,r1 | |
686 | adc $hi,$hi,$hi | |
687 | adds r1,r1,r1 | |
688 | adc $lo,$lo,$lo | |
689 | adds r1,r1,r1 | |
690 | adc $hi,$hi,$hi | |
691 | adds r1,r1,r1 | |
692 | adc $lo,$lo,$lo | |
693 | b .Lane_loop | |
694 | ||
695 | .Lane_done: | |
696 | eor r2,r2,$lo | |
697 | eor r3,r3,$hi | |
698 | add $inp,$inp,#8 | |
699 | stmia $A_flat!,{r2-r3} @ A_flat[i++] ^= BitInterleave(inp[0..7]) | |
700 | subs $bsz,$bsz,#8 | |
701 | bhi .Loop_block | |
702 | ||
703 | str $inp,[sp,#340] | |
704 | ||
705 | bl KeccakF1600_int | |
706 | ||
707 | ldr $inp,[sp,#340] | |
708 | ldr $len,[sp,#344] | |
709 | ldr $bsz,[sp,#348] | |
710 | b .Loop_absorb | |
711 | ||
712 | .align 4 | |
713 | .Labsorbed: | |
714 | add r12,sp,#$A[1][0] | |
715 | ldr r14, [sp,#336] @ pull pointer to A[5][5] | |
716 | ldmia sp, {@C[0]-@C[9]} | |
717 | stmia r14!,{@C[0]-@C[9]} @ return A[5][5] | |
718 | ldmia r12!,{@C[0]-@C[9]} | |
719 | stmia r14!,{@C[0]-@C[9]} | |
720 | ldmia r12!,{@C[0]-@C[9]} | |
721 | stmia r14!,{@C[0]-@C[9]} | |
722 | ldmia r12!,{@C[0]-@C[9]} | |
723 | stmia r14!,{@C[0]-@C[9]} | |
724 | ldmia r12, {@C[0]-@C[9]} | |
725 | stmia r14, {@C[0]-@C[9]} | |
726 | ||
727 | add sp,sp,#320+32 | |
728 | mov r0,$len @ return value | |
729 | ldmia sp!,{r4-r12,pc} | |
730 | .size SHA3_absorb,.-SHA3_absorb | |
731 | ___ | |
732 | } | |
733 | { my ($A_flat,$out,$len,$bsz, $byte,$shl) = map("r$_", (4..9)); | |
734 | ||
735 | $code.=<<___; | |
736 | .global SHA3_squeeze | |
737 | .type SHA3_squeeze,%function | |
738 | .align 5 | |
739 | SHA3_squeeze: | |
740 | stmdb sp!,{r4-r10,lr} | |
741 | mov r12,r0 | |
742 | mov $A_flat,r0 | |
743 | mov $out,r1 | |
744 | mov $len,r2 | |
745 | mov $bsz,r3 | |
746 | mov r14,r3 | |
747 | b .Loop_squeeze | |
748 | ||
749 | .align 4 | |
750 | .Loop_squeeze: | |
751 | ldmia r12!,{r0,r1} @ A_flat[i++] | |
752 | mov $shl,#28 | |
753 | ||
754 | .Lane_squeeze: | |
755 | lsl r2,r0,$shl | |
756 | lsl r3,r1,$shl | |
757 | eor $byte,$byte,$byte | |
758 | adds r3,r3,r3 @ sip through carry flag | |
759 | adc $byte,$byte,$byte | |
760 | adds r2,r2,r2 | |
761 | adc $byte,$byte,$byte | |
762 | adds r3,r3,r3 | |
763 | adc $byte,$byte,$byte | |
764 | adds r2,r2,r2 | |
765 | adc $byte,$byte,$byte | |
766 | adds r3,r3,r3 | |
767 | adc $byte,$byte,$byte | |
768 | adds r2,r2,r2 | |
769 | adc $byte,$byte,$byte | |
770 | adds r3,r3,r3 | |
771 | adc $byte,$byte,$byte | |
772 | adds r2,r2,r2 | |
773 | adc $byte,$byte,$byte | |
774 | subs $len,$len,#1 @ len -= 1 | |
775 | str $byte,[$out],#1 | |
776 | beq .Lsqueeze_done | |
777 | subs $shl,$shl,#4 | |
778 | bhs .Lane_squeeze | |
779 | ||
780 | subs r14,r14,#8 @ bsz -= 8 | |
781 | bhi .Loop_squeeze | |
782 | ||
783 | mov r0,$A_flat | |
784 | ||
785 | bl KeccakF1600 | |
786 | ||
787 | mov r12,$A_flat | |
788 | mov r14,$bsz | |
789 | b .Loop_squeeze | |
790 | ||
791 | .Lsqueeze_done: | |
792 | ldmia sp!,{r4-r10,pc} | |
793 | .size SHA3_squeeze,.-SHA3_squeeze | |
794 | .asciz "Keccak-1600 absorb and squeeze for ARMv4, CRYPTOGAMS by <appro\@openssl.org>" | |
795 | .align 2 | |
796 | ___ | |
797 | } | |
798 | ||
aabfd329 | 799 | print $code; |
56676f87 AP |
800 | |
801 | close STDOUT; # enforce flush |