]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/sha/asm/sha512-armv4.pl
SHA512 for ARMv4.
[thirdparty/openssl.git] / crypto / sha / asm / sha512-armv4.pl
CommitLineData
1fa29843
AP
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# SHA512 block procedure for ARMv4. September 2007.
11
12# This code is ~4.5 (four and a half) times faster than code generated
13# by gcc 3.4 and it spends ~72 clock cycles per byte.
14
15# This module currently has dependency on byte order, namely *dword*
16# order in ctx->h[0-9]. I have to think of a way to reliably detect
17# "endianness" [and flip below two constants] or arrange given dword
18# order in C.
19$lo=0; # this denotes little-endian platform.
20$hi=4;
21
22$ctx="r0";
23$inp="r1";
24$len="r2";
25$Tlo="r3";
26$Thi="r4";
27$Alo="r5";
28$Ahi="r6";
29$Elo="r7";
30$Ehi="r8";
31$t0="r9";
32$t1="r10";
33$t2="r11";
34$t3="r12";
35############ r13 is stack pointer
36$Ktbl="r14";
37############ r15 is program counter
38
39$Aoff=8*0;
40$Boff=8*1;
41$Coff=8*2;
42$Doff=8*3;
43$Eoff=8*4;
44$Foff=8*5;
45$Goff=8*6;
46$Hoff=8*7;
47$Xoff=8*8;
48
49sub BODY_00_15() {
50my $magic = shift;
51$code.=<<___;
52 ldr $t2,[sp,#$Hoff+0] @ h.lo
53 ldr $t3,[sp,#$Hoff+4] @ h.hi
54 @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
55 @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
56 @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
57 mov $t0,$Elo,lsr#14
58 mov $t1,$Ehi,lsr#14
59 eor $t0,$t0,$Ehi,lsl#18
60 eor $t1,$t1,$Elo,lsl#18
61 eor $t0,$t0,$Elo,lsr#18
62 eor $t1,$t1,$Ehi,lsr#18
63 eor $t0,$t0,$Ehi,lsl#14
64 eor $t1,$t1,$Elo,lsl#14
65 eor $t0,$t0,$Ehi,lsr#9
66 eor $t1,$t1,$Elo,lsr#9
67 eor $t0,$t0,$Elo,lsl#23
68 eor $t1,$t1,$Ehi,lsl#23 @ Sigma1(e)
69 adds $Tlo,$Tlo,$t0
70 adc $Thi,$Thi,$t1 @ T += Sigma1(e)
71 adds $Tlo,$Tlo,$t2
72 adc $Thi,$Thi,$t3 @ T += h
73
74 ldr $t0,[sp,#$Foff+0] @ f.lo
75 ldr $t1,[sp,#$Foff+4] @ f.hi
76 ldr $t2,[sp,#$Goff+0] @ g.lo
77 ldr $t3,[sp,#$Goff+4] @ g.hi
78 str $Elo,[sp,#$Eoff+0]
79 str $Ehi,[sp,#$Eoff+4]
80 str $Alo,[sp,#$Aoff+0]
81 str $Ahi,[sp,#$Aoff+4]
82
83 eor $t0,$t0,$t2
84 eor $t1,$t1,$t3
85 and $t0,$t0,$Elo
86 and $t1,$t1,$Ehi
87 eor $t0,$t0,$t2
88 eor $t1,$t1,$t3 @ Ch(e,f,g)
89
90 ldr $t2,[$Ktbl,#4] @ K[i].lo
91 ldr $t3,[$Ktbl,#0] @ K[i].hi
92 ldr $Elo,[sp,#$Doff+0] @ d.lo
93 ldr $Ehi,[sp,#$Doff+4] @ d.hi
94
95 adds $Tlo,$Tlo,$t0
96 adc $Thi,$Thi,$t1 @ T += Ch(e,f,g)
97 adds $Tlo,$Tlo,$t2
98 adc $Thi,$Thi,$t3 @ T += K[i]
99 adds $Elo,$Elo,$Tlo
100 adc $Ehi,$Ehi,$Thi @ d += T
101
102 and $t0,$t2,#0xff
103 teq $t0,#$magic
104 orreq $Ktbl,$Ktbl,#1
105
106 ldr $t2,[sp,#$Boff+0] @ b.lo
107 ldr $t3,[sp,#$Coff+0] @ c.lo
108 @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
109 @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
110 @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
111 mov $t0,$Alo,lsr#28
112 mov $t1,$Ahi,lsr#28
113 eor $t0,$t0,$Ahi,lsl#4
114 eor $t1,$t1,$Alo,lsl#4
115 eor $t0,$t0,$Ahi,lsr#2
116 eor $t1,$t1,$Alo,lsr#2
117 eor $t0,$t0,$Alo,lsl#30
118 eor $t1,$t1,$Ahi,lsl#30
119 eor $t0,$t0,$Ahi,lsr#7
120 eor $t1,$t1,$Alo,lsr#7
121 eor $t0,$t0,$Alo,lsl#25
122 eor $t1,$t1,$Ahi,lsl#25 @ Sigma0(a)
123 adds $Tlo,$Tlo,$t0
124 adc $Thi,$Thi,$t1 @ T += Sigma0(a)
125
126 and $t0,$Alo,$t2
127 orr $Alo,$Alo,$t2
128 ldr $t1,[sp,#$Boff+4] @ b.hi
129 ldr $t2,[sp,#$Coff+4] @ c.hi
130 and $Alo,$Alo,$t3
131 orr $Alo,$Alo,$t0 @ Maj(a,b,c).lo
132 and $t3,$Ahi,$t1
133 orr $Ahi,$Ahi,$t1
134 and $Ahi,$Ahi,$t2
135 orr $Ahi,$Ahi,$t3 @ Maj(a,b,c).hi
136 adds $Alo,$Alo,$Tlo
137 adc $Ahi,$Ahi,$Thi @ h += T
138
139 sub sp,sp,#8
140 add $Ktbl,$Ktbl,#8
141___
142}
143$code=<<___;
144.text
145.code 32
146.type K512,%object
147.align 5
148K512:
149.word 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd
150.word 0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc
151.word 0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019
152.word 0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118
153.word 0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe
154.word 0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2
155.word 0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1
156.word 0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694
157.word 0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3
158.word 0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65
159.word 0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483
160.word 0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5
161.word 0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210
162.word 0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4
163.word 0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725
164.word 0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70
165.word 0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926
166.word 0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df
167.word 0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8
168.word 0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b
169.word 0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001
170.word 0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30
171.word 0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910
172.word 0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8
173.word 0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53
174.word 0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8
175.word 0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb
176.word 0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3
177.word 0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60
178.word 0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec
179.word 0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9
180.word 0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b
181.word 0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207
182.word 0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178
183.word 0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6
184.word 0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b
185.word 0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493
186.word 0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c
187.word 0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a
188.word 0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817
189.size K512,.-K512
190
191.global sha512_block_data_order
192.type sha512_block_data_order,%function
193sha512_block_data_order:
194 sub r3,pc,#8 @ sha512_block_data_order
195 add $len,$inp,$len,lsl#7 @ len to point at the end of inp
196 stmdb sp!,{r4-r12,lr}
197 sub $Ktbl,r3,#640 @ K512
198 sub sp,sp,#9*8
199
200 ldr $Elo,[$ctx,#$Eoff+$lo]
201 ldr $Ehi,[$ctx,#$Eoff+$hi]
202 ldr $t0, [$ctx,#$Goff+$lo]
203 ldr $t1, [$ctx,#$Goff+$hi]
204 ldr $t2, [$ctx,#$Hoff+$lo]
205 ldr $t3, [$ctx,#$Hoff+$hi]
206.Loop:
207 str $t0, [sp,#$Goff+0]
208 str $t1, [sp,#$Goff+4]
209 str $t2, [sp,#$Hoff+0]
210 str $t3, [sp,#$Hoff+4]
211 ldr $Alo,[$ctx,#$Aoff+$lo]
212 ldr $Ahi,[$ctx,#$Aoff+$hi]
213 ldr $Tlo,[$ctx,#$Boff+$lo]
214 ldr $Thi,[$ctx,#$Boff+$hi]
215 ldr $t0, [$ctx,#$Coff+$lo]
216 ldr $t1, [$ctx,#$Coff+$hi]
217 ldr $t2, [$ctx,#$Doff+$lo]
218 ldr $t3, [$ctx,#$Doff+$hi]
219 str $Tlo,[sp,#$Boff+0]
220 str $Thi,[sp,#$Boff+4]
221 str $t0, [sp,#$Coff+0]
222 str $t1, [sp,#$Coff+4]
223 str $t2, [sp,#$Doff+0]
224 str $t3, [sp,#$Doff+4]
225 ldr $Tlo,[$ctx,#$Foff+$lo]
226 ldr $Thi,[$ctx,#$Foff+$hi]
227 str $Tlo,[sp,#$Foff+0]
228 str $Thi,[sp,#$Foff+4]
229
230.L00_15:
231 ldrb $Tlo,[$inp,#7]
232 ldrb $t0, [$inp,#6]
233 ldrb $t1, [$inp,#5]
234 ldrb $t2, [$inp,#4]
235 ldrb $Thi,[$inp,#3]
236 ldrb $t3, [$inp,#2]
237 orr $Tlo,$Tlo,$t0,lsl#8
238 ldrb $t0, [$inp,#1]
239 orr $Tlo,$Tlo,$t1,lsl#16
240 ldrb $t1, [$inp],#8
241 orr $Tlo,$Tlo,$t2,lsl#24
242 orr $Thi,$Thi,$t3,lsl#8
243 orr $Thi,$Thi,$t0,lsl#16
244 orr $Thi,$Thi,$t1,lsl#24
245 str $Tlo,[sp,#$Xoff+0]
246 str $Thi,[sp,#$Xoff+4]
247___
248 &BODY_00_15(0x94);
249$code.=<<___;
250 tst $Ktbl,#1
251 beq .L00_15
252 bic $Ktbl,$Ktbl,#1
253
254.L16_79:
255 ldr $t0,[sp,#`$Xoff+8*(16-1)`+0]
256 ldr $t1,[sp,#`$Xoff+8*(16-1)`+4]
257 ldr $t2,[sp,#`$Xoff+8*(16-14)`+0]
258 ldr $t3,[sp,#`$Xoff+8*(16-14)`+4]
259
260 @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
261 @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
262 @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7
263 mov $Tlo,$t0,lsr#1
264 mov $Thi,$t1,lsr#1
265 eor $Tlo,$Tlo,$t1,lsl#31
266 eor $Thi,$Thi,$t0,lsl#31
267 eor $Tlo,$Tlo,$t0,lsr#8
268 eor $Thi,$Thi,$t1,lsr#8
269 eor $Tlo,$Tlo,$t1,lsl#24
270 eor $Thi,$Thi,$t0,lsl#24
271 eor $Tlo,$Tlo,$t0,lsr#7
272 eor $Thi,$Thi,$t1,lsr#7
273 eor $Tlo,$Tlo,$t1,lsl#25
274
275 @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
276 @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
277 @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
278 mov $t0,$t2,lsr#19
279 mov $t1,$t3,lsr#19
280 eor $t0,$t0,$t3,lsl#13
281 eor $t1,$t1,$t2,lsl#13
282 eor $t0,$t0,$t3,lsr#29
283 eor $t1,$t1,$t2,lsr#29
284 eor $t0,$t0,$t2,lsl#3
285 eor $t1,$t1,$t3,lsl#3
286 eor $t0,$t0,$t2,lsr#6
287 eor $t1,$t1,$t3,lsr#6
288 eor $t0,$t0,$t3,lsl#26
289
290 ldr $t2,[sp,#`$Xoff+8*(16-9)`+0]
291 ldr $t3,[sp,#`$Xoff+8*(16-9)`+4]
292 adds $Tlo,$Tlo,$t0
293 adc $Thi,$Thi,$t1
294
295 ldr $t0,[sp,#`$Xoff+8*16`+0]
296 ldr $t1,[sp,#`$Xoff+8*16`+4]
297 adds $Tlo,$Tlo,$t2
298 adc $Thi,$Thi,$t3
299 adds $Tlo,$Tlo,$t0
300 adc $Thi,$Thi,$t1
301 str $Tlo,[sp,#$Xoff+0]
302 str $Thi,[sp,#$Xoff+4]
303___
304 &BODY_00_15(0x17);
305$code.=<<___;
306 tst $Ktbl,#1
307 beq .L16_79
308 bic $Ktbl,$Ktbl,#1
309
310 ldr $Tlo,[sp,#$Boff+0]
311 ldr $Thi,[sp,#$Boff+4]
312 ldr $t0, [$ctx,#$Aoff+$lo]
313 ldr $t1, [$ctx,#$Aoff+$hi]
314 ldr $t2, [$ctx,#$Boff+$lo]
315 ldr $t3, [$ctx,#$Boff+$hi]
316 adds $t0,$Alo,$t0
317 adc $t1,$Ahi,$t1
318 adds $t2,$Tlo,$t2
319 adc $t3,$Thi,$t3
320 str $t0, [$ctx,#$Aoff+$lo]
321 str $t1, [$ctx,#$Aoff+$hi]
322 str $t2, [$ctx,#$Boff+$lo]
323 str $t3, [$ctx,#$Boff+$hi]
324
325 ldr $Alo,[sp,#$Coff+0]
326 ldr $Ahi,[sp,#$Coff+4]
327 ldr $Tlo,[sp,#$Doff+0]
328 ldr $Thi,[sp,#$Doff+4]
329 ldr $t0, [$ctx,#$Coff+$lo]
330 ldr $t1, [$ctx,#$Coff+$hi]
331 ldr $t2, [$ctx,#$Doff+$lo]
332 ldr $t3, [$ctx,#$Doff+$hi]
333 adds $t0,$Alo,$t0
334 adc $t1,$Ahi,$t1
335 adds $t2,$Tlo,$t2
336 adc $t3,$Thi,$t3
337 str $t0, [$ctx,#$Coff+$lo]
338 str $t1, [$ctx,#$Coff+$hi]
339 str $t2, [$ctx,#$Doff+$lo]
340 str $t3, [$ctx,#$Doff+$hi]
341
342 ldr $Tlo,[sp,#$Foff+0]
343 ldr $Thi,[sp,#$Foff+4]
344 ldr $t0, [$ctx,#$Eoff+$lo]
345 ldr $t1, [$ctx,#$Eoff+$hi]
346 ldr $t2, [$ctx,#$Foff+$lo]
347 ldr $t3, [$ctx,#$Foff+$hi]
348 adds $Elo,$Elo,$t0
349 adc $Ehi,$Ehi,$t1
350 adds $t2,$Tlo,$t2
351 adc $t3,$Thi,$t3
352 str $Elo,[$ctx,#$Eoff+$lo]
353 str $Ehi,[$ctx,#$Eoff+$hi]
354 str $t2, [$ctx,#$Foff+$lo]
355 str $t3, [$ctx,#$Foff+$hi]
356
357 ldr $Alo,[sp,#$Goff+0]
358 ldr $Ahi,[sp,#$Goff+4]
359 ldr $Tlo,[sp,#$Hoff+0]
360 ldr $Thi,[sp,#$Hoff+4]
361 ldr $t0, [$ctx,#$Goff+$lo]
362 ldr $t1, [$ctx,#$Goff+$hi]
363 ldr $t2, [$ctx,#$Hoff+$lo]
364 ldr $t3, [$ctx,#$Hoff+$hi]
365 adds $t0,$Alo,$t0
366 adc $t1,$Ahi,$t1
367 adds $t2,$Tlo,$t2
368 adc $t3,$Thi,$t3
369 str $t0, [$ctx,#$Goff+$lo]
370 str $t1, [$ctx,#$Goff+$hi]
371 str $t2, [$ctx,#$Hoff+$lo]
372 str $t3, [$ctx,#$Hoff+$hi]
373
374 add sp,sp,#640
375 sub $Ktbl,$Ktbl,#640
376
377 teq $inp,$len
378 bne .Loop
379
380 add sp,sp,#8*9 @ destroy frame
381 ldmia sp!,{r4-r12,lr}
382 tst lr,#1
383 moveq pc,lr @ be binary compatible with V4, yet
384 bx lr @ interoperable with Thumb ISA:-)
385.size sha512_block_data_order,.-sha512_block_data_order
386.asciz "SHA512 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
387___
388
389$code =~ s/\`([^\`]*)\`/eval $1/gem;
390print $code;