3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
10 # SHA512 block procedure for ARMv4. September 2007.
12 # This code is ~4.5 (four and a half) times faster than code generated
13 # by gcc 3.4 and it spends ~72 clock cycles per byte.
15 # This module currently has dependency on byte order, namely *dword*
16 # order in ctx->h[0-9]. I have to think of a way to reliably detect
17 # "endianness" [and flip below two constants] or arrange given dword
19 $lo=0; # this denotes little-endian platform.
35 ############ r13 is stack pointer
37 ############ r15 is program counter
52 ldr
$t2,[sp
,#$Hoff+0] @ h.lo
53 ldr
$t3,[sp
,#$Hoff+4] @ h.hi
54 @ Sigma1
(x
) (ROTR
((x
),14) ^ ROTR
((x
),18) ^ ROTR
((x
),41))
55 @ LO lo
>>14^hi
<<18 ^ lo
>>18^hi
<<14 ^ hi
>>9^lo
<<23
56 @ HI hi
>>14^lo
<<18 ^ hi
>>18^lo
<<14 ^ lo
>>9^hi
<<23
59 eor
$t0,$t0,$Ehi,lsl
#18
60 eor
$t1,$t1,$Elo,lsl
#18
61 eor
$t0,$t0,$Elo,lsr
#18
62 eor
$t1,$t1,$Ehi,lsr
#18
63 eor
$t0,$t0,$Ehi,lsl
#14
64 eor
$t1,$t1,$Elo,lsl
#14
65 eor
$t0,$t0,$Ehi,lsr
#9
66 eor
$t1,$t1,$Elo,lsr
#9
67 eor
$t0,$t0,$Elo,lsl
#23
68 eor
$t1,$t1,$Ehi,lsl
#23 @ Sigma1(e)
70 adc
$Thi,$Thi,$t1 @ T
+= Sigma1
(e
)
72 adc
$Thi,$Thi,$t3 @ T
+= h
74 ldr
$t0,[sp
,#$Foff+0] @ f.lo
75 ldr
$t1,[sp
,#$Foff+4] @ f.hi
76 ldr
$t2,[sp
,#$Goff+0] @ g.lo
77 ldr
$t3,[sp
,#$Goff+4] @ g.hi
78 str
$Elo,[sp
,#$Eoff+0]
79 str
$Ehi,[sp
,#$Eoff+4]
80 str
$Alo,[sp
,#$Aoff+0]
81 str
$Ahi,[sp
,#$Aoff+4]
88 eor
$t1,$t1,$t3 @ Ch
(e
,f
,g
)
90 ldr
$t2,[$Ktbl,#4] @ K[i].lo
91 ldr
$t3,[$Ktbl,#0] @ K[i].hi
92 ldr
$Elo,[sp
,#$Doff+0] @ d.lo
93 ldr
$Ehi,[sp
,#$Doff+4] @ d.hi
96 adc
$Thi,$Thi,$t1 @ T
+= Ch
(e
,f
,g
)
98 adc
$Thi,$Thi,$t3 @ T
+= K
[i
]
100 adc
$Ehi,$Ehi,$Thi @ d
+= T
106 ldr
$t2,[sp
,#$Boff+0] @ b.lo
107 ldr
$t3,[sp
,#$Coff+0] @ c.lo
108 @ Sigma0
(x
) (ROTR
((x
),28) ^ ROTR
((x
),34) ^ ROTR
((x
),39))
109 @ LO lo
>>28^hi
<<4 ^ hi
>>2^lo
<<30 ^ hi
>>7^lo
<<25
110 @ HI hi
>>28^lo
<<4 ^ lo
>>2^hi
<<30 ^ lo
>>7^hi
<<25
113 eor
$t0,$t0,$Ahi,lsl
#4
114 eor
$t1,$t1,$Alo,lsl
#4
115 eor
$t0,$t0,$Ahi,lsr
#2
116 eor
$t1,$t1,$Alo,lsr
#2
117 eor
$t0,$t0,$Alo,lsl
#30
118 eor
$t1,$t1,$Ahi,lsl
#30
119 eor
$t0,$t0,$Ahi,lsr
#7
120 eor
$t1,$t1,$Alo,lsr
#7
121 eor
$t0,$t0,$Alo,lsl
#25
122 eor
$t1,$t1,$Ahi,lsl
#25 @ Sigma0(a)
124 adc
$Thi,$Thi,$t1 @ T
+= Sigma0
(a
)
128 ldr
$t1,[sp
,#$Boff+4] @ b.hi
129 ldr
$t2,[sp
,#$Coff+4] @ c.hi
131 orr
$Alo,$Alo,$t0 @ Maj
(a
,b
,c
).lo
135 orr
$Ahi,$Ahi,$t3 @ Maj
(a
,b
,c
).hi
137 adc
$Ahi,$Ahi,$Thi @ h
+= T
149 .word
0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd
150 .word
0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc
151 .word
0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019
152 .word
0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118
153 .word
0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe
154 .word
0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2
155 .word
0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1
156 .word
0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694
157 .word
0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3
158 .word
0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65
159 .word
0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483
160 .word
0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5
161 .word
0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210
162 .word
0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4
163 .word
0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725
164 .word
0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70
165 .word
0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926
166 .word
0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df
167 .word
0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8
168 .word
0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b
169 .word
0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001
170 .word
0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30
171 .word
0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910
172 .word
0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8
173 .word
0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53
174 .word
0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8
175 .word
0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb
176 .word
0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3
177 .word
0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60
178 .word
0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec
179 .word
0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9
180 .word
0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b
181 .word
0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207
182 .word
0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178
183 .word
0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6
184 .word
0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b
185 .word
0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493
186 .word
0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c
187 .word
0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a
188 .word
0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817
191 .global sha512_block_data_order
192 .type sha512_block_data_order
,%function
193 sha512_block_data_order
:
194 sub r3
,pc
,#8 @ sha512_block_data_order
195 add
$len,$inp,$len,lsl
#7 @ len to point at the end of inp
196 stmdb sp
!,{r4
-r12
,lr
}
197 sub $Ktbl,r3
,#640 @ K512
200 ldr
$Elo,[$ctx,#$Eoff+$lo]
201 ldr
$Ehi,[$ctx,#$Eoff+$hi]
202 ldr
$t0, [$ctx,#$Goff+$lo]
203 ldr
$t1, [$ctx,#$Goff+$hi]
204 ldr
$t2, [$ctx,#$Hoff+$lo]
205 ldr
$t3, [$ctx,#$Hoff+$hi]
207 str
$t0, [sp
,#$Goff+0]
208 str
$t1, [sp
,#$Goff+4]
209 str
$t2, [sp
,#$Hoff+0]
210 str
$t3, [sp
,#$Hoff+4]
211 ldr
$Alo,[$ctx,#$Aoff+$lo]
212 ldr
$Ahi,[$ctx,#$Aoff+$hi]
213 ldr
$Tlo,[$ctx,#$Boff+$lo]
214 ldr
$Thi,[$ctx,#$Boff+$hi]
215 ldr
$t0, [$ctx,#$Coff+$lo]
216 ldr
$t1, [$ctx,#$Coff+$hi]
217 ldr
$t2, [$ctx,#$Doff+$lo]
218 ldr
$t3, [$ctx,#$Doff+$hi]
219 str
$Tlo,[sp
,#$Boff+0]
220 str
$Thi,[sp
,#$Boff+4]
221 str
$t0, [sp
,#$Coff+0]
222 str
$t1, [sp
,#$Coff+4]
223 str
$t2, [sp
,#$Doff+0]
224 str
$t3, [sp
,#$Doff+4]
225 ldr
$Tlo,[$ctx,#$Foff+$lo]
226 ldr
$Thi,[$ctx,#$Foff+$hi]
227 str
$Tlo,[sp
,#$Foff+0]
228 str
$Thi,[sp
,#$Foff+4]
237 orr
$Tlo,$Tlo,$t0,lsl
#8
239 orr
$Tlo,$Tlo,$t1,lsl
#16
241 orr
$Tlo,$Tlo,$t2,lsl
#24
242 orr
$Thi,$Thi,$t3,lsl
#8
243 orr
$Thi,$Thi,$t0,lsl
#16
244 orr
$Thi,$Thi,$t1,lsl
#24
245 str
$Tlo,[sp
,#$Xoff+0]
246 str
$Thi,[sp
,#$Xoff+4]
255 ldr
$t0,[sp
,#`$Xoff+8*(16-1)`+0]
256 ldr
$t1,[sp
,#`$Xoff+8*(16-1)`+4]
257 ldr
$t2,[sp
,#`$Xoff+8*(16-14)`+0]
258 ldr
$t3,[sp
,#`$Xoff+8*(16-14)`+4]
260 @ sigma0
(x
) (ROTR
((x
),1) ^ ROTR
((x
),8) ^ ((x
)>>7))
261 @ LO lo
>>1^hi
<<31 ^ lo
>>8^hi
<<24 ^ lo
>>7^hi
<<25
262 @ HI hi
>>1^lo
<<31 ^ hi
>>8^lo
<<24 ^ hi
>>7
265 eor
$Tlo,$Tlo,$t1,lsl
#31
266 eor
$Thi,$Thi,$t0,lsl
#31
267 eor
$Tlo,$Tlo,$t0,lsr
#8
268 eor
$Thi,$Thi,$t1,lsr
#8
269 eor
$Tlo,$Tlo,$t1,lsl
#24
270 eor
$Thi,$Thi,$t0,lsl
#24
271 eor
$Tlo,$Tlo,$t0,lsr
#7
272 eor
$Thi,$Thi,$t1,lsr
#7
273 eor
$Tlo,$Tlo,$t1,lsl
#25
275 @ sigma1
(x
) (ROTR
((x
),19) ^ ROTR
((x
),61) ^ ((x
)>>6))
276 @ LO lo
>>19^hi
<<13 ^ hi
>>29^lo
<<3 ^ lo
>>6^hi
<<26
277 @ HI hi
>>19^lo
<<13 ^ lo
>>29^hi
<<3 ^ hi
>>6
280 eor
$t0,$t0,$t3,lsl
#13
281 eor
$t1,$t1,$t2,lsl
#13
282 eor
$t0,$t0,$t3,lsr
#29
283 eor
$t1,$t1,$t2,lsr
#29
284 eor
$t0,$t0,$t2,lsl
#3
285 eor
$t1,$t1,$t3,lsl
#3
286 eor
$t0,$t0,$t2,lsr
#6
287 eor
$t1,$t1,$t3,lsr
#6
288 eor
$t0,$t0,$t3,lsl
#26
290 ldr
$t2,[sp
,#`$Xoff+8*(16-9)`+0]
291 ldr
$t3,[sp
,#`$Xoff+8*(16-9)`+4]
295 ldr
$t0,[sp
,#`$Xoff+8*16`+0]
296 ldr
$t1,[sp
,#`$Xoff+8*16`+4]
301 str
$Tlo,[sp
,#$Xoff+0]
302 str
$Thi,[sp
,#$Xoff+4]
310 ldr
$Tlo,[sp
,#$Boff+0]
311 ldr
$Thi,[sp
,#$Boff+4]
312 ldr
$t0, [$ctx,#$Aoff+$lo]
313 ldr
$t1, [$ctx,#$Aoff+$hi]
314 ldr
$t2, [$ctx,#$Boff+$lo]
315 ldr
$t3, [$ctx,#$Boff+$hi]
320 str
$t0, [$ctx,#$Aoff+$lo]
321 str
$t1, [$ctx,#$Aoff+$hi]
322 str
$t2, [$ctx,#$Boff+$lo]
323 str
$t3, [$ctx,#$Boff+$hi]
325 ldr
$Alo,[sp
,#$Coff+0]
326 ldr
$Ahi,[sp
,#$Coff+4]
327 ldr
$Tlo,[sp
,#$Doff+0]
328 ldr
$Thi,[sp
,#$Doff+4]
329 ldr
$t0, [$ctx,#$Coff+$lo]
330 ldr
$t1, [$ctx,#$Coff+$hi]
331 ldr
$t2, [$ctx,#$Doff+$lo]
332 ldr
$t3, [$ctx,#$Doff+$hi]
337 str
$t0, [$ctx,#$Coff+$lo]
338 str
$t1, [$ctx,#$Coff+$hi]
339 str
$t2, [$ctx,#$Doff+$lo]
340 str
$t3, [$ctx,#$Doff+$hi]
342 ldr
$Tlo,[sp
,#$Foff+0]
343 ldr
$Thi,[sp
,#$Foff+4]
344 ldr
$t0, [$ctx,#$Eoff+$lo]
345 ldr
$t1, [$ctx,#$Eoff+$hi]
346 ldr
$t2, [$ctx,#$Foff+$lo]
347 ldr
$t3, [$ctx,#$Foff+$hi]
352 str
$Elo,[$ctx,#$Eoff+$lo]
353 str
$Ehi,[$ctx,#$Eoff+$hi]
354 str
$t2, [$ctx,#$Foff+$lo]
355 str
$t3, [$ctx,#$Foff+$hi]
357 ldr
$Alo,[sp
,#$Goff+0]
358 ldr
$Ahi,[sp
,#$Goff+4]
359 ldr
$Tlo,[sp
,#$Hoff+0]
360 ldr
$Thi,[sp
,#$Hoff+4]
361 ldr
$t0, [$ctx,#$Goff+$lo]
362 ldr
$t1, [$ctx,#$Goff+$hi]
363 ldr
$t2, [$ctx,#$Hoff+$lo]
364 ldr
$t3, [$ctx,#$Hoff+$hi]
369 str
$t0, [$ctx,#$Goff+$lo]
370 str
$t1, [$ctx,#$Goff+$hi]
371 str
$t2, [$ctx,#$Hoff+$lo]
372 str
$t3, [$ctx,#$Hoff+$hi]
380 add sp
,sp
,#8*9 @ destroy frame
381 ldmia sp
!,{r4
-r12
,lr
}
383 moveq pc
,lr @ be binary compatible with V4
, yet
384 bx lr @ interoperable with Thumb ISA
:-)
385 .size sha512_block_data_order
,.-sha512_block_data_order
386 .asciz
"SHA512 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
389 $code =~ s/\`([^\`]*)\`/eval $1/gem;