]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/sha/asm/sha512-armv4.pl
Move -march=armv4t to ./config.
[thirdparty/openssl.git] / crypto / sha / asm / sha512-armv4.pl
CommitLineData
1fa29843
AP
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# SHA512 block procedure for ARMv4. September 2007.
11
12# This code is ~4.5 (four and a half) times faster than code generated
13# by gcc 3.4 and it spends ~72 clock cycles per byte.
14
74eb3e09
AP
15# Byte order [in]dependence. =========================================
16#
17# Caller is expected to maintain specific *dword* order in h[0-7],
18# namely with most significant dword at *lower* address, which is
19# reflected in below two parameters. *Byte* order within these dwords
20# in turn is whatever *native* byte order on current platform.
21$hi=0;
22$lo=4;
23# ====================================================================
1fa29843 24
4c7c5ff6
AP
25$output=shift;
26open STDOUT,">$output";
27
1fa29843
AP
28$ctx="r0";
29$inp="r1";
30$len="r2";
31$Tlo="r3";
32$Thi="r4";
33$Alo="r5";
34$Ahi="r6";
35$Elo="r7";
36$Ehi="r8";
37$t0="r9";
38$t1="r10";
39$t2="r11";
40$t3="r12";
41############ r13 is stack pointer
42$Ktbl="r14";
43############ r15 is program counter
44
45$Aoff=8*0;
46$Boff=8*1;
47$Coff=8*2;
48$Doff=8*3;
49$Eoff=8*4;
50$Foff=8*5;
51$Goff=8*6;
52$Hoff=8*7;
53$Xoff=8*8;
54
55sub BODY_00_15() {
56my $magic = shift;
57$code.=<<___;
58 ldr $t2,[sp,#$Hoff+0] @ h.lo
59 ldr $t3,[sp,#$Hoff+4] @ h.hi
60 @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
61 @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
62 @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
63 mov $t0,$Elo,lsr#14
64 mov $t1,$Ehi,lsr#14
65 eor $t0,$t0,$Ehi,lsl#18
66 eor $t1,$t1,$Elo,lsl#18
67 eor $t0,$t0,$Elo,lsr#18
68 eor $t1,$t1,$Ehi,lsr#18
69 eor $t0,$t0,$Ehi,lsl#14
70 eor $t1,$t1,$Elo,lsl#14
71 eor $t0,$t0,$Ehi,lsr#9
72 eor $t1,$t1,$Elo,lsr#9
73 eor $t0,$t0,$Elo,lsl#23
74 eor $t1,$t1,$Ehi,lsl#23 @ Sigma1(e)
75 adds $Tlo,$Tlo,$t0
76 adc $Thi,$Thi,$t1 @ T += Sigma1(e)
77 adds $Tlo,$Tlo,$t2
78 adc $Thi,$Thi,$t3 @ T += h
79
80 ldr $t0,[sp,#$Foff+0] @ f.lo
81 ldr $t1,[sp,#$Foff+4] @ f.hi
82 ldr $t2,[sp,#$Goff+0] @ g.lo
83 ldr $t3,[sp,#$Goff+4] @ g.hi
84 str $Elo,[sp,#$Eoff+0]
85 str $Ehi,[sp,#$Eoff+4]
86 str $Alo,[sp,#$Aoff+0]
87 str $Ahi,[sp,#$Aoff+4]
88
89 eor $t0,$t0,$t2
90 eor $t1,$t1,$t3
91 and $t0,$t0,$Elo
92 and $t1,$t1,$Ehi
93 eor $t0,$t0,$t2
94 eor $t1,$t1,$t3 @ Ch(e,f,g)
95
96 ldr $t2,[$Ktbl,#4] @ K[i].lo
97 ldr $t3,[$Ktbl,#0] @ K[i].hi
98 ldr $Elo,[sp,#$Doff+0] @ d.lo
99 ldr $Ehi,[sp,#$Doff+4] @ d.hi
100
101 adds $Tlo,$Tlo,$t0
102 adc $Thi,$Thi,$t1 @ T += Ch(e,f,g)
103 adds $Tlo,$Tlo,$t2
104 adc $Thi,$Thi,$t3 @ T += K[i]
105 adds $Elo,$Elo,$Tlo
106 adc $Ehi,$Ehi,$Thi @ d += T
107
108 and $t0,$t2,#0xff
109 teq $t0,#$magic
110 orreq $Ktbl,$Ktbl,#1
111
112 ldr $t2,[sp,#$Boff+0] @ b.lo
b5e5760d 113 ldr $t3,[sp,#$Coff+0] @ c.lo
1fa29843
AP
114 @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
115 @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
116 @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
117 mov $t0,$Alo,lsr#28
118 mov $t1,$Ahi,lsr#28
119 eor $t0,$t0,$Ahi,lsl#4
120 eor $t1,$t1,$Alo,lsl#4
121 eor $t0,$t0,$Ahi,lsr#2
122 eor $t1,$t1,$Alo,lsr#2
123 eor $t0,$t0,$Alo,lsl#30
124 eor $t1,$t1,$Ahi,lsl#30
125 eor $t0,$t0,$Ahi,lsr#7
126 eor $t1,$t1,$Alo,lsr#7
127 eor $t0,$t0,$Alo,lsl#25
128 eor $t1,$t1,$Ahi,lsl#25 @ Sigma0(a)
129 adds $Tlo,$Tlo,$t0
130 adc $Thi,$Thi,$t1 @ T += Sigma0(a)
131
132 and $t0,$Alo,$t2
133 orr $Alo,$Alo,$t2
134 ldr $t1,[sp,#$Boff+4] @ b.hi
135 ldr $t2,[sp,#$Coff+4] @ c.hi
136 and $Alo,$Alo,$t3
137 orr $Alo,$Alo,$t0 @ Maj(a,b,c).lo
138 and $t3,$Ahi,$t1
139 orr $Ahi,$Ahi,$t1
140 and $Ahi,$Ahi,$t2
141 orr $Ahi,$Ahi,$t3 @ Maj(a,b,c).hi
142 adds $Alo,$Alo,$Tlo
143 adc $Ahi,$Ahi,$Thi @ h += T
144
145 sub sp,sp,#8
146 add $Ktbl,$Ktbl,#8
147___
148}
149$code=<<___;
150.text
151.code 32
152.type K512,%object
153.align 5
154K512:
155.word 0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd
156.word 0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc
157.word 0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019
158.word 0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118
159.word 0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe
160.word 0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2
161.word 0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1
162.word 0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694
163.word 0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3
164.word 0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65
165.word 0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483
166.word 0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5
167.word 0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210
168.word 0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4
169.word 0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725
170.word 0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70
171.word 0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926
172.word 0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df
173.word 0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8
174.word 0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b
175.word 0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001
176.word 0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30
177.word 0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910
178.word 0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8
179.word 0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53
180.word 0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8
181.word 0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb
182.word 0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3
183.word 0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60
184.word 0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec
185.word 0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9
186.word 0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b
187.word 0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207
188.word 0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178
189.word 0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6
190.word 0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b
191.word 0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493
192.word 0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c
193.word 0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a
194.word 0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817
195.size K512,.-K512
196
197.global sha512_block_data_order
198.type sha512_block_data_order,%function
199sha512_block_data_order:
200 sub r3,pc,#8 @ sha512_block_data_order
201 add $len,$inp,$len,lsl#7 @ len to point at the end of inp
202 stmdb sp!,{r4-r12,lr}
203 sub $Ktbl,r3,#640 @ K512
204 sub sp,sp,#9*8
205
206 ldr $Elo,[$ctx,#$Eoff+$lo]
207 ldr $Ehi,[$ctx,#$Eoff+$hi]
208 ldr $t0, [$ctx,#$Goff+$lo]
209 ldr $t1, [$ctx,#$Goff+$hi]
210 ldr $t2, [$ctx,#$Hoff+$lo]
211 ldr $t3, [$ctx,#$Hoff+$hi]
212.Loop:
213 str $t0, [sp,#$Goff+0]
214 str $t1, [sp,#$Goff+4]
215 str $t2, [sp,#$Hoff+0]
216 str $t3, [sp,#$Hoff+4]
217 ldr $Alo,[$ctx,#$Aoff+$lo]
218 ldr $Ahi,[$ctx,#$Aoff+$hi]
219 ldr $Tlo,[$ctx,#$Boff+$lo]
220 ldr $Thi,[$ctx,#$Boff+$hi]
221 ldr $t0, [$ctx,#$Coff+$lo]
222 ldr $t1, [$ctx,#$Coff+$hi]
223 ldr $t2, [$ctx,#$Doff+$lo]
224 ldr $t3, [$ctx,#$Doff+$hi]
225 str $Tlo,[sp,#$Boff+0]
226 str $Thi,[sp,#$Boff+4]
227 str $t0, [sp,#$Coff+0]
228 str $t1, [sp,#$Coff+4]
229 str $t2, [sp,#$Doff+0]
230 str $t3, [sp,#$Doff+4]
231 ldr $Tlo,[$ctx,#$Foff+$lo]
232 ldr $Thi,[$ctx,#$Foff+$hi]
233 str $Tlo,[sp,#$Foff+0]
234 str $Thi,[sp,#$Foff+4]
235
236.L00_15:
237 ldrb $Tlo,[$inp,#7]
238 ldrb $t0, [$inp,#6]
239 ldrb $t1, [$inp,#5]
240 ldrb $t2, [$inp,#4]
241 ldrb $Thi,[$inp,#3]
242 ldrb $t3, [$inp,#2]
243 orr $Tlo,$Tlo,$t0,lsl#8
244 ldrb $t0, [$inp,#1]
245 orr $Tlo,$Tlo,$t1,lsl#16
246 ldrb $t1, [$inp],#8
247 orr $Tlo,$Tlo,$t2,lsl#24
248 orr $Thi,$Thi,$t3,lsl#8
249 orr $Thi,$Thi,$t0,lsl#16
250 orr $Thi,$Thi,$t1,lsl#24
251 str $Tlo,[sp,#$Xoff+0]
252 str $Thi,[sp,#$Xoff+4]
253___
254 &BODY_00_15(0x94);
255$code.=<<___;
256 tst $Ktbl,#1
257 beq .L00_15
258 bic $Ktbl,$Ktbl,#1
259
260.L16_79:
261 ldr $t0,[sp,#`$Xoff+8*(16-1)`+0]
262 ldr $t1,[sp,#`$Xoff+8*(16-1)`+4]
263 ldr $t2,[sp,#`$Xoff+8*(16-14)`+0]
264 ldr $t3,[sp,#`$Xoff+8*(16-14)`+4]
265
266 @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
267 @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
268 @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7
269 mov $Tlo,$t0,lsr#1
270 mov $Thi,$t1,lsr#1
271 eor $Tlo,$Tlo,$t1,lsl#31
272 eor $Thi,$Thi,$t0,lsl#31
273 eor $Tlo,$Tlo,$t0,lsr#8
274 eor $Thi,$Thi,$t1,lsr#8
275 eor $Tlo,$Tlo,$t1,lsl#24
276 eor $Thi,$Thi,$t0,lsl#24
277 eor $Tlo,$Tlo,$t0,lsr#7
278 eor $Thi,$Thi,$t1,lsr#7
279 eor $Tlo,$Tlo,$t1,lsl#25
280
281 @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
282 @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
283 @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
284 mov $t0,$t2,lsr#19
285 mov $t1,$t3,lsr#19
286 eor $t0,$t0,$t3,lsl#13
287 eor $t1,$t1,$t2,lsl#13
288 eor $t0,$t0,$t3,lsr#29
289 eor $t1,$t1,$t2,lsr#29
290 eor $t0,$t0,$t2,lsl#3
291 eor $t1,$t1,$t3,lsl#3
292 eor $t0,$t0,$t2,lsr#6
293 eor $t1,$t1,$t3,lsr#6
294 eor $t0,$t0,$t3,lsl#26
295
296 ldr $t2,[sp,#`$Xoff+8*(16-9)`+0]
297 ldr $t3,[sp,#`$Xoff+8*(16-9)`+4]
298 adds $Tlo,$Tlo,$t0
299 adc $Thi,$Thi,$t1
300
301 ldr $t0,[sp,#`$Xoff+8*16`+0]
302 ldr $t1,[sp,#`$Xoff+8*16`+4]
303 adds $Tlo,$Tlo,$t2
304 adc $Thi,$Thi,$t3
305 adds $Tlo,$Tlo,$t0
306 adc $Thi,$Thi,$t1
307 str $Tlo,[sp,#$Xoff+0]
308 str $Thi,[sp,#$Xoff+4]
309___
310 &BODY_00_15(0x17);
311$code.=<<___;
312 tst $Ktbl,#1
313 beq .L16_79
314 bic $Ktbl,$Ktbl,#1
315
316 ldr $Tlo,[sp,#$Boff+0]
317 ldr $Thi,[sp,#$Boff+4]
318 ldr $t0, [$ctx,#$Aoff+$lo]
319 ldr $t1, [$ctx,#$Aoff+$hi]
320 ldr $t2, [$ctx,#$Boff+$lo]
321 ldr $t3, [$ctx,#$Boff+$hi]
322 adds $t0,$Alo,$t0
323 adc $t1,$Ahi,$t1
324 adds $t2,$Tlo,$t2
325 adc $t3,$Thi,$t3
326 str $t0, [$ctx,#$Aoff+$lo]
327 str $t1, [$ctx,#$Aoff+$hi]
328 str $t2, [$ctx,#$Boff+$lo]
329 str $t3, [$ctx,#$Boff+$hi]
330
331 ldr $Alo,[sp,#$Coff+0]
332 ldr $Ahi,[sp,#$Coff+4]
333 ldr $Tlo,[sp,#$Doff+0]
334 ldr $Thi,[sp,#$Doff+4]
335 ldr $t0, [$ctx,#$Coff+$lo]
336 ldr $t1, [$ctx,#$Coff+$hi]
337 ldr $t2, [$ctx,#$Doff+$lo]
338 ldr $t3, [$ctx,#$Doff+$hi]
339 adds $t0,$Alo,$t0
340 adc $t1,$Ahi,$t1
341 adds $t2,$Tlo,$t2
342 adc $t3,$Thi,$t3
343 str $t0, [$ctx,#$Coff+$lo]
344 str $t1, [$ctx,#$Coff+$hi]
345 str $t2, [$ctx,#$Doff+$lo]
346 str $t3, [$ctx,#$Doff+$hi]
347
348 ldr $Tlo,[sp,#$Foff+0]
349 ldr $Thi,[sp,#$Foff+4]
350 ldr $t0, [$ctx,#$Eoff+$lo]
351 ldr $t1, [$ctx,#$Eoff+$hi]
352 ldr $t2, [$ctx,#$Foff+$lo]
353 ldr $t3, [$ctx,#$Foff+$hi]
354 adds $Elo,$Elo,$t0
355 adc $Ehi,$Ehi,$t1
356 adds $t2,$Tlo,$t2
357 adc $t3,$Thi,$t3
358 str $Elo,[$ctx,#$Eoff+$lo]
359 str $Ehi,[$ctx,#$Eoff+$hi]
360 str $t2, [$ctx,#$Foff+$lo]
361 str $t3, [$ctx,#$Foff+$hi]
362
363 ldr $Alo,[sp,#$Goff+0]
364 ldr $Ahi,[sp,#$Goff+4]
365 ldr $Tlo,[sp,#$Hoff+0]
366 ldr $Thi,[sp,#$Hoff+4]
367 ldr $t0, [$ctx,#$Goff+$lo]
368 ldr $t1, [$ctx,#$Goff+$hi]
369 ldr $t2, [$ctx,#$Hoff+$lo]
370 ldr $t3, [$ctx,#$Hoff+$hi]
371 adds $t0,$Alo,$t0
372 adc $t1,$Ahi,$t1
373 adds $t2,$Tlo,$t2
374 adc $t3,$Thi,$t3
375 str $t0, [$ctx,#$Goff+$lo]
376 str $t1, [$ctx,#$Goff+$hi]
377 str $t2, [$ctx,#$Hoff+$lo]
378 str $t3, [$ctx,#$Hoff+$hi]
379
380 add sp,sp,#640
381 sub $Ktbl,$Ktbl,#640
382
383 teq $inp,$len
384 bne .Loop
385
386 add sp,sp,#8*9 @ destroy frame
387 ldmia sp!,{r4-r12,lr}
388 tst lr,#1
389 moveq pc,lr @ be binary compatible with V4, yet
390 bx lr @ interoperable with Thumb ISA:-)
391.size sha512_block_data_order,.-sha512_block_data_order
392.asciz "SHA512 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
393___
394
395$code =~ s/\`([^\`]*)\`/eval $1/gem;
396print $code;
4c7c5ff6 397close STDOUT; # enforce flush