]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/sha/asm/keccak1600-armv4.pl
sha/asm/keccak1600-armv4.pl: add SHA3_absorb and SHA3_squeeze.
[thirdparty/openssl.git] / crypto / sha / asm / keccak1600-armv4.pl
CommitLineData
56676f87
AP
1#!/usr/bin/env perl
2# Copyright 2017 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8#
9# ====================================================================
10# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
11# project. The module is, however, dual licensed under OpenSSL and
12# CRYPTOGAMS licenses depending on where you obtain it. For further
13# details see http://www.openssl.org/~appro/cryptogams/.
14# ====================================================================
15#
16# Keccak-1600 for ARMv4.
17#
18# June 2017.
19#
20# This is KECCAK_1X variant (see keccak1600.c) with bit interleaving.
21# How does it compare to Keccak Code Package? It's as fast, but several
22# times smaller, and is endian- and ISA-neutral. ISA neutrality means
23# that minimum ISA requirement is ARMv4, yet it can be assembled even
24# as ARMv7 Thumb-2.
25#
26########################################################################
27# Numbers are cycles per processed byte accounting even for input bit
28# interleaving.
29#
30# r=1600(*) r=1024
31#
32# Cortex-A7 71/+180% 103
33# Cortex-A8 48/+290% 69
34# Cortex-A15 34/+210% 49
35#
36# (*) Not used in real life, meaningful as estimate for single sponge
37# operation performance. Numbers after slash are improvement over
38# compiler-generated KECCAK_1X reference code.
39
aabfd329
AP
40my @C = map("r$_",(0..9));
41my @E = map("r$_",(10..12,14));
42
56676f87
AP
43########################################################################
44# Stack layout
45# ----->+-----------------------+
46# | uint64_t A[5][5] |
47# | ... |
48# +200->+-----------------------+
49# | uint64_t D[5] |
50# | ... |
51# +240->+-----------------------+
52# | uint64_t T[2][5] |
53# | ... |
54# +320->+-----------------------+
55# | saved lr |
56# +324->+-----------------------+
57# | loop counter |
58# +328->+-----------------------+
59# | ...
60
aabfd329
AP
61my @A = map([ 8*$_, 8*($_+1), 8*($_+2), 8*($_+3), 8*($_+4) ], (0,5,10,15,20));
62my @D = map(8*$_, (25..29));
63my @T = map([ 8*$_, 8*($_+1), 8*($_+2), 8*($_+3), 8*($_+4) ], (30,35));
64
65$code.=<<___;
66.text
67
56676f87
AP
68#if defined(__thumb2__)
69.syntax unified
70.thumb
71#else
72.code 32
73#endif
74
aabfd329
AP
75.type iotas,%object
76.align 5
77iotas:
78 .long 0x00000001, 0x00000000
79 .long 0x00000000, 0x00000089
80 .long 0x00000000, 0x8000008b
81 .long 0x00000000, 0x80008080
82 .long 0x00000001, 0x0000008b
83 .long 0x00000001, 0x00008000
84 .long 0x00000001, 0x80008088
85 .long 0x00000001, 0x80000082
86 .long 0x00000000, 0x0000000b
87 .long 0x00000000, 0x0000000a
88 .long 0x00000001, 0x00008082
89 .long 0x00000000, 0x00008003
90 .long 0x00000001, 0x0000808b
91 .long 0x00000001, 0x8000000b
92 .long 0x00000001, 0x8000008a
93 .long 0x00000001, 0x80000081
94 .long 0x00000000, 0x80000081
95 .long 0x00000000, 0x80000008
96 .long 0x00000000, 0x00000083
97 .long 0x00000000, 0x80008003
98 .long 0x00000001, 0x80008088
99 .long 0x00000000, 0x80000088
100 .long 0x00000001, 0x00008000
101 .long 0x00000000, 0x80008082
56676f87 102.size iotas,.-iotas
aabfd329 103
56676f87 104.type KeccakF1600_int, %function
aabfd329 105.align 5
56676f87
AP
106KeccakF1600_int:
107 ldmia sp,{@C[0]-@C[9]} @ A[0][0..4]
aabfd329 108 add @E[0],sp,#$A[1][0]
56676f87
AP
109KeccakF1600_enter:
110 str lr,[sp,#320]
111 eor @E[1],@E[1],@E[1]
112 str @E[1],[sp,#324]
113 b .Lround_enter
aabfd329
AP
114
115.align 4
116.Lround:
56676f87
AP
117 ldmia sp,{@C[0]-@C[9]} @ A[0][0..4]
118.Lround_enter:
aabfd329
AP
119 ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[1][0..1]
120 eor @C[0],@C[0],@E[0]
121 add @E[0],sp,#$A[1][2]
122 eor @C[1],@C[1],@E[1]
123 eor @C[2],@C[2],@E[2]
124 eor @C[3],@C[3],@E[3]
125 ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[1][2..3]
126 eor @C[4],@C[4],@E[0]
127 add @E[0],sp,#$A[1][4]
128 eor @C[5],@C[5],@E[1]
129 eor @C[6],@C[6],@E[2]
130 eor @C[7],@C[7],@E[3]
131 ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[1][4]..A[2][0]
132 eor @C[8],@C[8],@E[0]
133 add @E[0],sp,#$A[2][1]
134 eor @C[9],@C[9],@E[1]
135 eor @C[0],@C[0],@E[2]
136 eor @C[1],@C[1],@E[3]
137 ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[2][1..2]
138 eor @C[2],@C[2],@E[0]
139 add @E[0],sp,#$A[2][3]
140 eor @C[3],@C[3],@E[1]
141 eor @C[4],@C[4],@E[2]
142 eor @C[5],@C[5],@E[3]
143 ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[2][3..4]
144 eor @C[6],@C[6],@E[0]
145 add @E[0],sp,#$A[3][0]
146 eor @C[7],@C[7],@E[1]
147 eor @C[8],@C[8],@E[2]
148 eor @C[9],@C[9],@E[3]
149 ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[3][0..1]
150 eor @C[0],@C[0],@E[0]
151 add @E[0],sp,#$A[3][2]
152 eor @C[1],@C[1],@E[1]
153 eor @C[2],@C[2],@E[2]
154 eor @C[3],@C[3],@E[3]
155 ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[3][2..3]
156 eor @C[4],@C[4],@E[0]
157 add @E[0],sp,#$A[3][4]
158 eor @C[5],@C[5],@E[1]
159 eor @C[6],@C[6],@E[2]
160 eor @C[7],@C[7],@E[3]
161 ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[3][4]..A[4][0]
162 eor @C[8],@C[8],@E[0]
163 add @E[0],sp,#$A[4][1]
164 eor @C[9],@C[9],@E[1]
165 eor @C[0],@C[0],@E[2]
166 eor @C[1],@C[1],@E[3]
167 ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[4][1..2]
168 eor @C[2],@C[2],@E[0]
169 add @E[0],sp,#$A[4][3]
170 eor @C[3],@C[3],@E[1]
171 eor @C[4],@C[4],@E[2]
172 eor @C[5],@C[5],@E[3]
173 ldmia @E[0],{@E[0]-@E[2],@E[3]} @ A[4][3..4]
174 eor @C[6],@C[6],@E[0]
175 eor @C[7],@C[7],@E[1]
176 eor @C[8],@C[8],@E[2]
177 eor @C[9],@C[9],@E[3]
178
179 eor @E[0],@C[0],@C[5],ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0];
180 eor @E[1],@C[1],@C[4]
181 str @E[0],[sp,#$D[1]] @ D[1] = E[0]
182 eor @E[2],@C[6],@C[1],ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3];
183 str @E[1],[sp,#$D[1]+4]
184 eor @E[3],@C[7],@C[0]
185 str @E[2],[sp,#$D[4]] @ D[4] = E[1]
186 eor @C[0],@C[8],@C[3],ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4];
187 str @E[3],[sp,#$D[4]+4]
188 eor @C[1],@C[9],@C[2]
189 str @C[0],[sp,#$D[0]] @ D[0] = C[0]
190 eor @C[2],@C[2],@C[7],ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1];
191 str @C[1],[sp,#$D[0]+4]
192 eor @C[3],@C[3],@C[6]
193 str @C[2],[sp,#$D[2]] @ D[2] = C[1]
194 eor @C[4],@C[4],@C[9],ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2];
195 str @C[3],[sp,#$D[2]+4]
196 eor @C[5],@C[5],@C[8]
56676f87
AP
197 ldr @C[8],[sp,#$A[3][0]]
198 ldr @C[9],[sp,#$A[3][0]+4]
aabfd329
AP
199 str @C[4],[sp,#$D[3]] @ D[3] = C[2]
200 str @C[5],[sp,#$D[3]+4]
201
53718107 202 ldr @C[6],[sp,#$A[0][1]]
aabfd329 203 eor @C[8],@C[8],@C[0]
56676f87 204 ldr @C[7],[sp,#$A[0][1]+4]
aabfd329
AP
205 eor @C[9],@C[9],@C[1]
206 str @C[8],[sp,#$T[0][0]] @ T[0][0] = A[3][0] ^ C[0]; /* borrow T[0][0] */
53718107 207 ldr @C[8],[sp,#$A[0][2]]
aabfd329 208 str @C[9],[sp,#$T[0][0]+4]
53718107 209 ldr @C[9],[sp,#$A[0][2]+4]
aabfd329
AP
210 eor @C[6],@C[6],@E[0]
211 eor @C[7],@C[7],@E[1]
212 str @C[6],[sp,#$T[0][1]] @ T[0][1] = A[0][1] ^ E[0]; /* D[1] */
53718107 213 ldr @C[6],[sp,#$A[0][3]]
aabfd329 214 str @C[7],[sp,#$T[0][1]+4]
53718107 215 ldr @C[7],[sp,#$A[0][3]+4]
aabfd329
AP
216 eor @C[8],@C[8],@C[2]
217 eor @C[9],@C[9],@C[3]
218 str @C[8],[sp,#$T[0][2]] @ T[0][2] = A[0][2] ^ C[1]; /* D[2] */
53718107 219 ldr @C[8],[sp,#$A[0][4]]
aabfd329 220 str @C[9],[sp,#$T[0][2]+4]
53718107 221 ldr @C[9],[sp,#$A[0][4]+4]
aabfd329
AP
222 eor @C[6],@C[6],@C[4]
223 eor @C[7],@C[7],@C[5]
224 str @C[6],[sp,#$T[0][3]] @ T[0][3] = A[0][3] ^ C[2]; /* D[3] */
aabfd329 225 eor @C[8],@C[8],@E[2]
53718107 226 str @C[7],[sp,#$T[0][3]+4]
aabfd329 227 eor @C[9],@C[9],@E[3]
56676f87
AP
228 ldr @C[6],[sp,#$A[3][3]]
229 ldr @C[7],[sp,#$A[3][3]+4]
aabfd329
AP
230 str @C[8],[sp,#$T[0][4]] @ T[0][4] = A[0][4] ^ E[1]; /* D[4] */
231 str @C[9],[sp,#$T[0][4]+4]
232
53718107 233 ldr @C[8],[sp,#$A[4][4]]
aabfd329 234 eor @C[4],@C[4],@C[6]
56676f87 235 ldr @C[9],[sp,#$A[4][4]+4]
aabfd329
AP
236 eor @C[5],@C[5],@C[7]
237 ror @C[7],@C[4],#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */
53718107 238 ldr @C[4],[sp,#$A[0][0]]
aabfd329 239 ror @C[6],@C[5],#32-11
53718107 240 ldr @C[5],[sp,#$A[0][0]+4]
aabfd329
AP
241 eor @C[8],@C[8],@E[2]
242 eor @C[9],@C[9],@E[3]
243 ror @C[8],@C[8],#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */
53718107 244 ldr @E[2],[sp,#$A[2][2]]
aabfd329 245 ror @C[9],@C[9],#32-7
53718107 246 ldr @E[3],[sp,#$A[2][2]+4]
aabfd329
AP
247 eor @C[0],@C[0],@C[4]
248 eor @C[1],@C[1],@C[5] @ C[0] = A[0][0] ^ C[0]; /* rotate by 0 */ /* D[0] */
aabfd329 249 eor @E[2],@E[2],@C[2]
aabfd329 250 ldr @C[2],[sp,#$A[1][1]]
53718107 251 eor @E[3],@E[3],@C[3]
aabfd329 252 ldr @C[3],[sp,#$A[1][1]+4]
53718107 253 ror @C[5],@E[2],#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); /* D[2] */
56676f87 254 ldr @E[2],[sp,#324] @ load counter
aabfd329 255 eor @C[2],@C[2],@E[0]
53718107 256 ror @C[4],@E[3],#32-22
56676f87 257 adr @E[3],iotas
aabfd329
AP
258 eor @C[3],@C[3],@E[1]
259 ror @C[2],@C[2],#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); /* D[1] */
56676f87 260 add @E[3],@E[3],@E[2]
aabfd329
AP
261 ror @C[3],@C[3],#32-22
262
56676f87
AP
263 ldr @E[0],[@E[3],#0] @ iotas[i].lo
264 add @E[2],@E[2],#8
265 ldr @E[1],[@E[3],#4] @ iotas[i].hi
266 cmp @E[2],#192
267 str @E[2],[sp,#324] @ store counter
aabfd329 268
56676f87
AP
269 bic @E[2],@C[4],@C[2]
270 bic @E[3],@C[5],@C[3]
271 eor @E[2],@E[2],@C[0]
272 eor @E[3],@E[3],@C[1]
aabfd329
AP
273 eor @E[0],@E[0],@E[2]
274 eor @E[1],@E[1],@E[3]
275 str @E[0],[sp,#$A[0][0]] @ A[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
276 bic @E[2],@C[6],@C[4]
277 str @E[1],[sp,#$A[0][0]+4]
278 bic @E[3],@C[7],@C[5]
279 eor @E[2],@E[2],@C[2]
280 eor @E[3],@E[3],@C[3]
281 str @E[2],[sp,#$A[0][1]] @ A[0][1] = C[1] ^ (~C[2] & C[3]);
282 bic @E[0],@C[8],@C[6]
283 str @E[3],[sp,#$A[0][1]+4]
284 bic @E[1],@C[9],@C[7]
285 eor @E[0],@E[0],@C[4]
286 eor @E[1],@E[1],@C[5]
287 str @E[0],[sp,#$A[0][2]] @ A[0][2] = C[2] ^ (~C[3] & C[4]);
288 bic @E[2],@C[0],@C[8]
289 str @E[1],[sp,#$A[0][2]+4]
290 bic @E[3],@C[1],@C[9]
291 eor @E[2],@E[2],@C[6]
292 eor @E[3],@E[3],@C[7]
293 str @E[2],[sp,#$A[0][3]] @ A[0][3] = C[3] ^ (~C[4] & C[0]);
294 bic @E[0],@C[2],@C[0]
295 str @E[3],[sp,#$A[0][3]+4]
53718107 296 add @E[3],sp,#$D[0]
aabfd329
AP
297 bic @E[1],@C[3],@C[1]
298 eor @E[0],@E[0],@C[8]
299 eor @E[1],@E[1],@C[9]
300 str @E[0],[sp,#$A[0][4]] @ A[0][4] = C[4] ^ (~C[0] & C[1]);
301 str @E[1],[sp,#$A[0][4]+4]
302
aabfd329 303 ldmia @E[3],{@C[6]-@C[9],@E[0],@E[1],@E[2],@E[3]} @ D[0..3]
aabfd329
AP
304 ldr @C[0],[sp,#$A[1][0]]
305 ldr @C[1],[sp,#$A[1][0]+4]
53718107
AP
306 ldr @C[2],[sp,#$A[2][1]]
307 ldr @C[3],[sp,#$A[2][1]+4]
56676f87 308 ldr @C[4],[sp,#$D[4]]
aabfd329 309 eor @C[0],@C[0],@C[6]
56676f87 310 ldr @C[5],[sp,#$D[4]+4]
aabfd329
AP
311 eor @C[1],@C[1],@C[7]
312 str @C[0],[sp,#$T[1][0]] @ T[1][0] = A[1][0] ^ (C[3] = D[0]);
53718107 313 add @C[0],sp,#$A[1][2]
aabfd329 314 str @C[1],[sp,#$T[1][0]+4]
aabfd329
AP
315 eor @C[2],@C[2],@C[8]
316 eor @C[3],@C[3],@C[9]
317 str @C[2],[sp,#$T[1][1]] @ T[1][1] = A[2][1] ^ (C[4] = D[1]); /* borrow T[1][1] */
318 str @C[3],[sp,#$T[1][1]+4]
aabfd329
AP
319 ldmia @C[0],{@C[0]-@C[3]} @ A[1][2..3]
320 eor @C[0],@C[0],@E[0]
321 eor @C[1],@C[1],@E[1]
322 str @C[0],[sp,#$T[1][2]] @ T[1][2] = A[1][2] ^ (E[0] = D[2]);
53718107 323 ldr @C[0],[sp,#$A[2][4]]
aabfd329 324 str @C[1],[sp,#$T[1][2]+4]
53718107 325 ldr @C[1],[sp,#$A[2][4]+4]
aabfd329
AP
326 eor @C[2],@C[2],@E[2]
327 eor @C[3],@C[3],@E[3]
328 str @C[2],[sp,#$T[1][3]] @ T[1][3] = A[1][3] ^ (E[1] = D[3]);
53718107 329 ldr @C[2],[sp,#$T[0][3]]
aabfd329 330 str @C[3],[sp,#$T[1][3]+4]
53718107 331 ldr @C[3],[sp,#$T[0][3]+4]
aabfd329 332 eor @C[0],@C[0],@C[4]
53718107 333 ldr @E[2],[sp,#$A[1][4]]
aabfd329 334 eor @C[1],@C[1],@C[5]
53718107 335 ldr @E[3],[sp,#$A[1][4]+4]
aabfd329 336 str @C[0],[sp,#$T[1][4]] @ T[1][4] = A[2][4] ^ (C[2] = D[4]); /* borrow T[1][4] */
aabfd329 337
aabfd329 338 ror @C[0],@C[2],#32-14 @ C[0] = ROL64(T[0][3], rhotates[0][3]);
53718107 339 str @C[1],[sp,#$T[1][4]+4]
aabfd329 340 ror @C[1],@C[3],#32-14
aabfd329 341 eor @C[2],@E[2],@C[4]
53718107 342 ldr @C[4],[sp,#$A[2][0]]
aabfd329 343 eor @C[3],@E[3],@C[5]
53718107 344 ldr @C[5],[sp,#$A[2][0]+4]
aabfd329 345 ror @C[2],@C[2],#32-10 @ C[1] = ROL64(A[1][4] ^ C[2], rhotates[1][4]); /* D[4] */
53718107 346 ldr @E[2],[sp,#$A[3][1]]
aabfd329 347 ror @C[3],@C[3],#32-10
53718107 348 ldr @E[3],[sp,#$A[3][1]+4]
aabfd329
AP
349 eor @C[6],@C[6],@C[4]
350 eor @C[7],@C[7],@C[5]
351 ror @C[5],@C[6],#32-1 @ C[2] = ROL64(A[2][0] ^ C[3], rhotates[2][0]); /* D[0] */
aabfd329 352 eor @E[2],@E[2],@C[8]
53718107 353 ror @C[4],@C[7],#32-2
aabfd329 354 ldr @C[8],[sp,#$A[4][2]]
53718107 355 eor @E[3],@E[3],@C[9]
aabfd329 356 ldr @C[9],[sp,#$A[4][2]+4]
53718107 357 ror @C[7],@E[2],#32-22 @ C[3] = ROL64(A[3][1] ^ C[4], rhotates[3][1]); /* D[1] */
aabfd329 358 eor @E[0],@E[0],@C[8]
53718107 359 ror @C[6],@E[3],#32-23
aabfd329
AP
360 eor @E[1],@E[1],@C[9]
361 ror @C[9],@E[0],#32-30 @ C[4] = ROL64(A[4][2] ^ E[0], rhotates[4][2]); /* D[2] */
aabfd329
AP
362
363 bic @E[0],@C[4],@C[2]
53718107 364 ror @C[8],@E[1],#32-31
aabfd329
AP
365 bic @E[1],@C[5],@C[3]
366 eor @E[0],@E[0],@C[0]
367 eor @E[1],@E[1],@C[1]
368 str @E[0],[sp,#$A[1][0]] @ A[1][0] = C[0] ^ (~C[1] & C[2])
369 bic @E[2],@C[6],@C[4]
370 str @E[1],[sp,#$A[1][0]+4]
371 bic @E[3],@C[7],@C[5]
372 eor @E[2],@E[2],@C[2]
373 eor @E[3],@E[3],@C[3]
374 str @E[2],[sp,#$A[1][1]] @ A[1][1] = C[1] ^ (~C[2] & C[3]);
375 bic @E[0],@C[8],@C[6]
376 str @E[3],[sp,#$A[1][1]+4]
377 bic @E[1],@C[9],@C[7]
378 eor @E[0],@E[0],@C[4]
379 eor @E[1],@E[1],@C[5]
380 str @E[0],[sp,#$A[1][2]] @ A[1][2] = C[2] ^ (~C[3] & C[4]);
381 bic @E[2],@C[0],@C[8]
382 str @E[1],[sp,#$A[1][2]+4]
383 bic @E[3],@C[1],@C[9]
384 eor @E[2],@E[2],@C[6]
385 eor @E[3],@E[3],@C[7]
386 str @E[2],[sp,#$A[1][3]] @ A[1][3] = C[3] ^ (~C[4] & C[0]);
387 bic @E[0],@C[2],@C[0]
388 str @E[3],[sp,#$A[1][3]+4]
53718107 389 add @E[3],sp,#$D[3]
aabfd329 390 bic @E[1],@C[3],@C[1]
53718107 391 ldr @C[1],[sp,#$T[0][1]]
aabfd329 392 eor @E[0],@E[0],@C[8]
53718107 393 ldr @C[0],[sp,#$T[0][1]+4]
aabfd329
AP
394 eor @E[1],@E[1],@C[9]
395 str @E[0],[sp,#$A[1][4]] @ A[1][4] = C[4] ^ (~C[0] & C[1]);
396 str @E[1],[sp,#$A[1][4]+4]
397
aabfd329
AP
398 ldr @C[2],[sp,#$T[1][2]]
399 ldr @C[3],[sp,#$T[1][2]+4]
aabfd329
AP
400 ldmia @E[3],{@E[0]-@E[2],@E[3]} @ D[3..4]
401 ldr @C[4],[sp,#$A[2][3]]
53718107 402 ror @C[0],@C[0],#32-1 @ C[0] = ROL64(T[0][1], rhotates[0][1]);
aabfd329 403 ldr @C[5],[sp,#$A[2][3]+4]
53718107
AP
404 ror @C[2],@C[2],#32-3 @ C[1] = ROL64(T[1][2], rhotates[1][2]);
405 ldr @C[6],[sp,#$A[3][4]]
406 ror @C[3],@C[3],#32-3
407 ldr @C[7],[sp,#$A[3][4]+4]
aabfd329 408 eor @E[0],@E[0],@C[4]
53718107 409 ldr @C[8],[sp,#$A[4][0]]
aabfd329 410 eor @E[1],@E[1],@C[5]
53718107 411 ldr @C[9],[sp,#$A[4][0]+4]
aabfd329 412 ror @C[5],@E[0],#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
53718107 413 ldr @E[0],[sp,#$D[0]]
aabfd329 414 ror @C[4],@E[1],#32-13
53718107 415 ldr @E[1],[sp,#$D[0]+4]
aabfd329
AP
416 eor @C[6],@C[6],@E[2]
417 eor @C[7],@C[7],@E[3]
418 ror @C[6],@C[6],#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
aabfd329 419 eor @C[8],@C[8],@E[0]
53718107 420 ror @C[7],@C[7],#32-4
aabfd329
AP
421 eor @C[9],@C[9],@E[1]
422 ror @C[8],@C[8],#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
aabfd329
AP
423
424 bic @E[0],@C[4],@C[2]
53718107 425 ror @C[9],@C[9],#32-9
aabfd329
AP
426 bic @E[1],@C[5],@C[3]
427 eor @E[0],@E[0],@C[0]
428 eor @E[1],@E[1],@C[1]
429 str @E[0],[sp,#$A[2][0]] @ A[2][0] = C[0] ^ (~C[1] & C[2])
430 bic @E[2],@C[6],@C[4]
431 str @E[1],[sp,#$A[2][0]+4]
432 bic @E[3],@C[7],@C[5]
433 eor @E[2],@E[2],@C[2]
434 eor @E[3],@E[3],@C[3]
435 str @E[2],[sp,#$A[2][1]] @ A[2][1] = C[1] ^ (~C[2] & C[3]);
436 bic @E[0],@C[8],@C[6]
437 str @E[3],[sp,#$A[2][1]+4]
438 bic @E[1],@C[9],@C[7]
439 eor @E[0],@E[0],@C[4]
440 eor @E[1],@E[1],@C[5]
441 str @E[0],[sp,#$A[2][2]] @ A[2][2] = C[2] ^ (~C[3] & C[4]);
442 bic @E[2],@C[0],@C[8]
443 str @E[1],[sp,#$A[2][2]+4]
444 bic @E[3],@C[1],@C[9]
445 eor @E[2],@E[2],@C[6]
446 eor @E[3],@E[3],@C[7]
447 str @E[2],[sp,#$A[2][3]] @ A[2][3] = C[3] ^ (~C[4] & C[0]);
448 bic @E[0],@C[2],@C[0]
449 str @E[3],[sp,#$A[2][3]+4]
450 bic @E[1],@C[3],@C[1]
451 eor @E[0],@E[0],@C[8]
452 eor @E[1],@E[1],@C[9]
453 str @E[0],[sp,#$A[2][4]] @ A[2][4] = C[4] ^ (~C[0] & C[1]);
53718107 454 add @C[2],sp,#$T[1][0]
aabfd329
AP
455 str @E[1],[sp,#$A[2][4]+4]
456
53718107 457 add @E[3],sp,#$D[2]
aabfd329
AP
458 ldr @C[1],[sp,#$T[0][4]]
459 ldr @C[0],[sp,#$T[0][4]+4]
53718107
AP
460 ldmia @C[2],{@C[2]-@C[5]} @ T[1][0..1]
461 ldmia @E[3],{@E[0]-@E[2],@E[3]} @ D[2..3]
aabfd329 462 ror @C[1],@C[1],#32-13 @ C[0] = ROL64(T[0][4], rhotates[0][4]);
53718107 463 ldr @C[6],[sp,#$A[3][2]]
aabfd329 464 ror @C[0],@C[0],#32-14
53718107 465 ldr @C[7],[sp,#$A[3][2]+4]
aabfd329 466 ror @C[2],@C[2],#32-18 @ C[1] = ROL64(T[1][0], rhotates[1][0]);
53718107 467 ldr @C[8],[sp,#$A[4][3]]
aabfd329 468 ror @C[3],@C[3],#32-18
53718107 469 ldr @C[9],[sp,#$A[4][3]+4]
aabfd329 470 ror @C[4],@C[4],#32-5 @ C[2] = ROL64(T[1][1], rhotates[2][1]); /* originally A[2][1] */
aabfd329 471 eor @E[0],@E[0],@C[6]
53718107 472 ror @C[5],@C[5],#32-5
aabfd329
AP
473 eor @E[1],@E[1],@C[7]
474 ror @C[7],@E[0],#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
aabfd329 475 eor @C[8],@C[8],@E[2]
53718107 476 ror @C[6],@E[1],#32-8
aabfd329
AP
477 eor @C[9],@C[9],@E[3]
478 ror @C[8],@C[8],#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
aabfd329
AP
479
480 bic @E[0],@C[4],@C[2]
53718107 481 ror @C[9],@C[9],#32-28
aabfd329
AP
482 bic @E[1],@C[5],@C[3]
483 eor @E[0],@E[0],@C[0]
484 eor @E[1],@E[1],@C[1]
485 str @E[0],[sp,#$A[3][0]] @ A[3][0] = C[0] ^ (~C[1] & C[2])
486 bic @E[2],@C[6],@C[4]
487 str @E[1],[sp,#$A[3][0]+4]
488 bic @E[3],@C[7],@C[5]
489 eor @E[2],@E[2],@C[2]
490 eor @E[3],@E[3],@C[3]
491 str @E[2],[sp,#$A[3][1]] @ A[3][1] = C[1] ^ (~C[2] & C[3]);
492 bic @E[0],@C[8],@C[6]
493 str @E[3],[sp,#$A[3][1]+4]
494 bic @E[1],@C[9],@C[7]
495 eor @E[0],@E[0],@C[4]
496 eor @E[1],@E[1],@C[5]
497 str @E[0],[sp,#$A[3][2]] @ A[3][2] = C[2] ^ (~C[3] & C[4]);
498 bic @E[2],@C[0],@C[8]
499 str @E[1],[sp,#$A[3][2]+4]
500 bic @E[3],@C[1],@C[9]
501 eor @E[2],@E[2],@C[6]
502 eor @E[3],@E[3],@C[7]
503 str @E[2],[sp,#$A[3][3]] @ A[3][3] = C[3] ^ (~C[4] & C[0]);
504 bic @E[0],@C[2],@C[0]
505 str @E[3],[sp,#$A[3][3]+4]
506 bic @E[1],@C[3],@C[1]
507 eor @E[0],@E[0],@C[8]
508 eor @E[1],@E[1],@C[9]
509 str @E[0],[sp,#$A[3][4]] @ A[3][4] = C[4] ^ (~C[0] & C[1]);
53718107 510 add @E[3],sp,#$T[1][3]
aabfd329
AP
511 str @E[1],[sp,#$A[3][4]+4]
512
513 ldr @C[0],[sp,#$T[0][2]]
514 ldr @C[1],[sp,#$T[0][2]+4]
aabfd329 515 ldmia @E[3],{@E[0]-@E[2],@E[3]} @ T[1][3..4]
aabfd329 516 ldr @C[7],[sp,#$T[0][0]]
53718107 517 ror @C[0],@C[0],#32-31 @ C[0] = ROL64(T[0][2], rhotates[0][2]);
aabfd329 518 ldr @C[6],[sp,#$T[0][0]+4]
53718107 519 ror @C[1],@C[1],#32-31
aabfd329 520 ldr @C[8],[sp,#$A[4][1]]
53718107 521 ror @C[3],@E[0],#32-27 @ C[1] = ROL64(T[1][3], rhotates[1][3]);
aabfd329 522 ldr @E[0],[sp,#$D[1]]
53718107
AP
523 ror @C[2],@E[1],#32-28
524 ldr @C[9],[sp,#$A[4][1]+4]
525 ror @C[5],@E[2],#32-19 @ C[2] = ROL64(T[1][4], rhotates[2][4]); /* originally A[2][4] */
aabfd329 526 ldr @E[1],[sp,#$D[1]+4]
53718107 527 ror @C[4],@E[3],#32-20
aabfd329 528 eor @C[8],@C[8],@E[0]
53718107 529 ror @C[7],@C[7],#32-20 @ C[3] = ROL64(T[0][0], rhotates[3][0]); /* originally A[3][0] */
aabfd329 530 eor @C[9],@C[9],@E[1]
53718107 531 ror @C[6],@C[6],#32-21
aabfd329
AP
532
533 bic @E[0],@C[4],@C[2]
53718107 534 ror @C[8],@C[8],#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
aabfd329 535 bic @E[1],@C[5],@C[3]
53718107 536 ror @C[9],@C[9],#32-1
aabfd329
AP
537 eor @E[0],@E[0],@C[0]
538 eor @E[1],@E[1],@C[1]
539 str @E[0],[sp,#$A[4][0]] @ A[4][0] = C[0] ^ (~C[1] & C[2])
540 bic @E[2],@C[6],@C[4]
541 str @E[1],[sp,#$A[4][0]+4]
542 bic @E[3],@C[7],@C[5]
543 eor @E[2],@E[2],@C[2]
544 eor @E[3],@E[3],@C[3]
545 str @E[2],[sp,#$A[4][1]] @ A[4][1] = C[1] ^ (~C[2] & C[3]);
546 bic @E[0],@C[8],@C[6]
547 str @E[3],[sp,#$A[4][1]+4]
548 bic @E[1],@C[9],@C[7]
549 eor @E[0],@E[0],@C[4]
550 eor @E[1],@E[1],@C[5]
551 str @E[0],[sp,#$A[4][2]] @ A[4][2] = C[2] ^ (~C[3] & C[4]);
552 bic @E[2],@C[0],@C[8]
553 str @E[1],[sp,#$A[4][2]+4]
554 bic @E[3],@C[1],@C[9]
555 eor @E[2],@E[2],@C[6]
556 eor @E[3],@E[3],@C[7]
557 str @E[2],[sp,#$A[4][3]] @ A[4][3] = C[3] ^ (~C[4] & C[0]);
aabfd329 558 bic @E[0],@C[2],@C[0]
53718107 559 str @E[3],[sp,#$A[4][3]+4]
aabfd329
AP
560 bic @E[1],@C[3],@C[1]
561 eor @E[2],@E[0],@C[8]
aabfd329 562 eor @E[3],@E[1],@C[9]
aabfd329 563 str @E[2],[sp,#$A[4][4]] @ A[4][4] = C[4] ^ (~C[0] & C[1]);
56676f87 564 add @E[0],sp,#$A[1][0]
aabfd329
AP
565 str @E[3],[sp,#$A[4][4]+4]
566
567 blo .Lround
568
56676f87
AP
569 ldr pc,[sp,#320]
570.size KeccakF1600_int,.-KeccakF1600_int
571
572.type KeccakF1600, %function
573.align 5
574KeccakF1600:
575 stmdb sp!,{r0,r4-r11,lr}
576 sub sp,sp,#320+16 @ space for A[5][5],D[5],T[2][5],...
577
578 add @E[0],r0,#$A[1][0]
579 add @E[1],sp,#$A[1][0]
580 mov @E[2],r0
581 ldmia @E[0]!,{@C[0]-@C[9]} @ copy A[5][5] to stack
582 stmia @E[1]!,{@C[0]-@C[9]}
583 ldmia @E[0]!,{@C[0]-@C[9]}
584 stmia @E[1]!,{@C[0]-@C[9]}
585 ldmia @E[0]!,{@C[0]-@C[9]}
586 stmia @E[1]!,{@C[0]-@C[9]}
587 ldmia @E[0], {@C[0]-@C[9]}
588 stmia @E[1], {@C[0]-@C[9]}
589 ldmia @E[2], {@C[0]-@C[9]} @ A[0][0..4]
590 add @E[0],sp,#$A[1][0]
591 stmia sp, {@C[0]-@C[9]}
592
593 bl KeccakF1600_enter
594
595 ldr @E[1], [sp,#320+16] @ restore pointer to A
596 ldmia sp, {@C[0]-@C[9]}
597 stmia @E[1]!,{@C[0]-@C[9]} @ return A[5][5]
aabfd329
AP
598 ldmia @E[0]!,{@C[0]-@C[9]}
599 stmia @E[1]!,{@C[0]-@C[9]}
600 ldmia @E[0]!,{@C[0]-@C[9]}
601 stmia @E[1]!,{@C[0]-@C[9]}
602 ldmia @E[0]!,{@C[0]-@C[9]}
603 stmia @E[1]!,{@C[0]-@C[9]}
56676f87
AP
604 ldmia @E[0], {@C[0]-@C[9]}
605 stmia @E[1], {@C[0]-@C[9]}
aabfd329 606
56676f87
AP
607 add sp,sp,#320+20
608 ldmia sp!,{r4-r11,pc}
aabfd329
AP
609.size KeccakF1600,.-KeccakF1600
610___
56676f87
AP
611{ my ($hi,$lo,$i,$A_flat, $len,$bsz,$inp) = map("r$_",(5..8, 10..12));
612
613########################################################################
614# Stack layout
615# ----->+-----------------------+
616# | uint64_t A[5][5] |
617# | ... |
618# | ... |
619# +336->+-----------------------+
620# | uint64_t *A |
621# +340->+-----------------------+
622# | const void *inp |
623# +344->+-----------------------+
624# | size_t len |
625# +348->+-----------------------+
626# | size_t bs |
627# +352->+-----------------------+
628# | ....
629
630$code.=<<___;
631.global SHA3_absorb
632.type SHA3_absorb,%function
633.align 5
634SHA3_absorb:
635 stmdb sp!,{r0-r12,lr}
636 sub sp,sp,#320+16
637
638 mov r12,r0
639 add r14,sp,#0
640 mov $len,r2
641 mov $bsz,r3
642
643 ldmia r12!,{@C[0]-@C[9]} @ copy A[5][5] to stack
644 stmia r14!,{@C[0]-@C[9]}
645 ldmia r12!,{@C[0]-@C[9]}
646 stmia r14!,{@C[0]-@C[9]}
647 ldmia r12!,{@C[0]-@C[9]}
648 stmia r14!,{@C[0]-@C[9]}
649 ldmia r12!,{@C[0]-@C[9]}
650 stmia r14!,{@C[0]-@C[9]}
651 ldmia r12, {@C[0]-@C[9]}
652 stmia r14, {@C[0]-@C[9]}
653
654 ldr $inp,[sp,#340]
655
656.Loop_absorb:
657 subs r0,$len,$bsz
658 blo .Labsorbed
659 add $A_flat,sp,#0
660 str r0,[sp,#344] @ save len - bsz
661
662.Loop_block:
663 ldmia $A_flat,{r2-r3} @ A_flat[i]
664 ldrb r0,[$inp,#7]! @ inp[7]
665 mov $i,#8
666
667.Lane_loop:
668 subs $i,$i,#1
669 lsl r1,r0,#24
670 blo .Lane_done
671#ifdef __thumb2__
672 it ne
673 ldrbne r0,[$inp,#-1]!
674#else
675 ldrneb r0,[$inp,#-1]!
676#endif
677 adds r1,r1,r1 @ sip through carry flag
678 adc $hi,$hi,$hi
679 adds r1,r1,r1
680 adc $lo,$lo,$lo
681 adds r1,r1,r1
682 adc $hi,$hi,$hi
683 adds r1,r1,r1
684 adc $lo,$lo,$lo
685 adds r1,r1,r1
686 adc $hi,$hi,$hi
687 adds r1,r1,r1
688 adc $lo,$lo,$lo
689 adds r1,r1,r1
690 adc $hi,$hi,$hi
691 adds r1,r1,r1
692 adc $lo,$lo,$lo
693 b .Lane_loop
694
695.Lane_done:
696 eor r2,r2,$lo
697 eor r3,r3,$hi
698 add $inp,$inp,#8
699 stmia $A_flat!,{r2-r3} @ A_flat[i++] ^= BitInterleave(inp[0..7])
700 subs $bsz,$bsz,#8
701 bhi .Loop_block
702
703 str $inp,[sp,#340]
704
705 bl KeccakF1600_int
706
707 ldr $inp,[sp,#340]
708 ldr $len,[sp,#344]
709 ldr $bsz,[sp,#348]
710 b .Loop_absorb
711
712.align 4
713.Labsorbed:
714 add r12,sp,#$A[1][0]
715 ldr r14, [sp,#336] @ pull pointer to A[5][5]
716 ldmia sp, {@C[0]-@C[9]}
717 stmia r14!,{@C[0]-@C[9]} @ return A[5][5]
718 ldmia r12!,{@C[0]-@C[9]}
719 stmia r14!,{@C[0]-@C[9]}
720 ldmia r12!,{@C[0]-@C[9]}
721 stmia r14!,{@C[0]-@C[9]}
722 ldmia r12!,{@C[0]-@C[9]}
723 stmia r14!,{@C[0]-@C[9]}
724 ldmia r12, {@C[0]-@C[9]}
725 stmia r14, {@C[0]-@C[9]}
726
727 add sp,sp,#320+32
728 mov r0,$len @ return value
729 ldmia sp!,{r4-r12,pc}
730.size SHA3_absorb,.-SHA3_absorb
731___
732}
733{ my ($A_flat,$out,$len,$bsz, $byte,$shl) = map("r$_", (4..9));
734
735$code.=<<___;
736.global SHA3_squeeze
737.type SHA3_squeeze,%function
738.align 5
739SHA3_squeeze:
740 stmdb sp!,{r4-r10,lr}
741 mov r12,r0
742 mov $A_flat,r0
743 mov $out,r1
744 mov $len,r2
745 mov $bsz,r3
746 mov r14,r3
747 b .Loop_squeeze
748
749.align 4
750.Loop_squeeze:
751 ldmia r12!,{r0,r1} @ A_flat[i++]
752 mov $shl,#28
753
754.Lane_squeeze:
755 lsl r2,r0,$shl
756 lsl r3,r1,$shl
757 eor $byte,$byte,$byte
758 adds r3,r3,r3 @ sip through carry flag
759 adc $byte,$byte,$byte
760 adds r2,r2,r2
761 adc $byte,$byte,$byte
762 adds r3,r3,r3
763 adc $byte,$byte,$byte
764 adds r2,r2,r2
765 adc $byte,$byte,$byte
766 adds r3,r3,r3
767 adc $byte,$byte,$byte
768 adds r2,r2,r2
769 adc $byte,$byte,$byte
770 adds r3,r3,r3
771 adc $byte,$byte,$byte
772 adds r2,r2,r2
773 adc $byte,$byte,$byte
774 subs $len,$len,#1 @ len -= 1
775 str $byte,[$out],#1
776 beq .Lsqueeze_done
777 subs $shl,$shl,#4
778 bhs .Lane_squeeze
779
780 subs r14,r14,#8 @ bsz -= 8
781 bhi .Loop_squeeze
782
783 mov r0,$A_flat
784
785 bl KeccakF1600
786
787 mov r12,$A_flat
788 mov r14,$bsz
789 b .Loop_squeeze
790
791.Lsqueeze_done:
792 ldmia sp!,{r4-r10,pc}
793.size SHA3_squeeze,.-SHA3_squeeze
794.asciz "Keccak-1600 absorb and squeeze for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
795.align 2
796___
797}
798
aabfd329 799print $code;
56676f87
AP
800
801close STDOUT; # enforce flush