]>
Commit | Line | Data |
---|---|---|
a1a382db AP |
1 | #!/usr/bin/env perl |
2 | ||
3 | # ==================================================================== | |
4 | # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | |
5 | # project. The module is, however, dual licensed under OpenSSL and | |
6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | |
7 | # details see http://www.openssl.org/~appro/cryptogams/. | |
8 | # ==================================================================== | |
9 | ||
399f94bf | 10 | # SHA256 block procedure for ARMv4. May 2007. |
a1a382db | 11 | |
399f94bf AP |
12 | # Performance is ~2x better than gcc 3.4 generated code and in "abso- |
13 | # lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per | |
14 | # byte. | |
a1a382db | 15 | |
4c7c5ff6 AP |
16 | $output=shift; |
17 | open STDOUT,">$output"; | |
18 | ||
a1a382db AP |
19 | $ctx="r0"; $t0="r0"; |
20 | $inp="r1"; | |
21 | $len="r2"; $t1="r2"; | |
22 | $T1="r3"; | |
23 | $A="r4"; | |
24 | $B="r5"; | |
25 | $C="r6"; | |
26 | $D="r7"; | |
27 | $E="r8"; | |
28 | $F="r9"; | |
29 | $G="r10"; | |
30 | $H="r11"; | |
31 | @V=($A,$B,$C,$D,$E,$F,$G,$H); | |
32 | $t2="r12"; | |
33 | $Ktbl="r14"; | |
34 | ||
35 | @Sigma0=( 2,13,22); | |
36 | @Sigma1=( 6,11,25); | |
37 | @sigma0=( 7,18, 3); | |
38 | @sigma1=(17,19,10); | |
39 | ||
40 | sub BODY_00_15 { | |
41 | my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; | |
42 | ||
43 | $code.=<<___ if ($i<16); | |
44 | ldrb $T1,[$inp,#3] @ $i | |
45 | ldrb $t2,[$inp,#2] | |
46 | ldrb $t1,[$inp,#1] | |
47 | ldrb $t0,[$inp],#4 | |
48 | orr $T1,$T1,$t2,lsl#8 | |
49 | orr $T1,$T1,$t1,lsl#16 | |
50 | orr $T1,$T1,$t0,lsl#24 | |
51 | `"str $inp,[sp,#17*4]" if ($i==15)` | |
52 | ___ | |
53 | $code.=<<___; | |
54 | ldr $t2,[$Ktbl],#4 @ *K256++ | |
55 | str $T1,[sp,#`$i%16`*4] | |
56 | mov $t0,$e,ror#$Sigma1[0] | |
57 | eor $t0,$t0,$e,ror#$Sigma1[1] | |
58 | eor $t0,$t0,$e,ror#$Sigma1[2] @ Sigma1(e) | |
59 | add $T1,$T1,$t0 | |
60 | eor $t1,$f,$g | |
61 | and $t1,$t1,$e | |
62 | eor $t1,$t1,$g @ Ch(e,f,g) | |
63 | add $T1,$T1,$t1 | |
64 | add $T1,$T1,$h | |
65 | add $T1,$T1,$t2 | |
66 | mov $h,$a,ror#$Sigma0[0] | |
67 | eor $h,$h,$a,ror#$Sigma0[1] | |
68 | eor $h,$h,$a,ror#$Sigma0[2] @ Sigma0(a) | |
69 | orr $t0,$a,$b | |
70 | and $t0,$t0,$c | |
71 | and $t1,$a,$b | |
72 | orr $t0,$t0,$t1 @ Maj(a,b,c) | |
73 | add $h,$h,$t0 | |
74 | add $d,$d,$T1 | |
75 | add $h,$h,$T1 | |
76 | ___ | |
77 | } | |
78 | ||
79 | sub BODY_16_XX { | |
80 | my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; | |
81 | ||
82 | $code.=<<___; | |
83 | ldr $t1,[sp,#`($i+1)%16`*4] @ $i | |
84 | ldr $t2,[sp,#`($i+14)%16`*4] | |
85 | ldr $T1,[sp,#`($i+0)%16`*4] | |
86 | ldr $inp,[sp,#`($i+9)%16`*4] | |
87 | mov $t0,$t1,ror#$sigma0[0] | |
88 | eor $t0,$t0,$t1,ror#$sigma0[1] | |
89 | eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1]) | |
90 | mov $t1,$t2,ror#$sigma1[0] | |
91 | eor $t1,$t1,$t2,ror#$sigma1[1] | |
92 | eor $t1,$t1,$t2,lsr#$sigma1[2] @ sigma1(X[i+14]) | |
93 | add $T1,$T1,$t0 | |
94 | add $T1,$T1,$t1 | |
95 | add $T1,$T1,$inp | |
96 | ___ | |
97 | &BODY_00_15(@_); | |
98 | } | |
99 | ||
100 | $code=<<___; | |
101 | .text | |
102 | .code 32 | |
103 | ||
104 | .type K256,%object | |
105 | .align 5 | |
106 | K256: | |
107 | .word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | |
108 | .word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | |
109 | .word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 | |
110 | .word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 | |
111 | .word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc | |
112 | .word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da | |
113 | .word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 | |
114 | .word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 | |
115 | .word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 | |
116 | .word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 | |
117 | .word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 | |
118 | .word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 | |
119 | .word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 | |
120 | .word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 | |
121 | .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 | |
122 | .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | |
123 | .size K256,.-K256 | |
124 | ||
125 | .global sha256_block_data_order | |
126 | .type sha256_block_data_order,%function | |
127 | sha256_block_data_order: | |
128 | sub r3,pc,#8 @ sha256_block_data_order | |
129 | add $len,$inp,$len,lsl#6 @ len to point at the end of inp | |
130 | stmdb sp!,{$ctx,$inp,$len,r4-r12,lr} | |
131 | ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} | |
132 | sub $Ktbl,r3,#256 @ K256 | |
133 | sub sp,sp,#16*4 @ alloca(X[16]) | |
134 | .Loop: | |
135 | ___ | |
136 | for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); } | |
137 | $code.=".Lrounds_16_xx:\n"; | |
138 | for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); } | |
139 | $code.=<<___; | |
140 | and $t2,$t2,#0xff | |
141 | cmp $t2,#0xf2 | |
142 | bne .Lrounds_16_xx | |
143 | ||
144 | ldr $T1,[sp,#16*4] @ pull ctx | |
145 | ldr $t0,[$T1,#0] | |
146 | ldr $t1,[$T1,#4] | |
147 | ldr $t2,[$T1,#8] | |
148 | add $A,$A,$t0 | |
149 | ldr $t0,[$T1,#12] | |
150 | add $B,$B,$t1 | |
151 | ldr $t1,[$T1,#16] | |
152 | add $C,$C,$t2 | |
153 | ldr $t2,[$T1,#20] | |
154 | add $D,$D,$t0 | |
155 | ldr $t0,[$T1,#24] | |
156 | add $E,$E,$t1 | |
157 | ldr $t1,[$T1,#28] | |
158 | add $F,$F,$t2 | |
159 | ldr $inp,[sp,#17*4] @ pull inp | |
160 | ldr $t2,[sp,#18*4] @ pull inp+len | |
161 | add $G,$G,$t0 | |
162 | add $H,$H,$t1 | |
163 | stmia $T1,{$A,$B,$C,$D,$E,$F,$G,$H} | |
164 | cmp $inp,$t2 | |
165 | sub $Ktbl,$Ktbl,#256 @ rewind Ktbl | |
166 | bne .Loop | |
167 | ||
168 | add sp,sp,#`16+3`*4 @ destroy frame | |
169 | ldmia sp!,{r4-r12,lr} | |
170 | tst lr,#1 | |
171 | moveq pc,lr @ be binary compatible with V4, yet | |
172 | bx lr @ interoperable with Thumb ISA:-) | |
399f94bf | 173 | .size sha256_block_data_order,.-sha256_block_data_order |
a1a382db AP |
174 | .asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro\@openssl.org>" |
175 | ___ | |
176 | ||
177 | $code =~ s/\`([^\`]*)\`/eval $1/gem; | |
7722e53f | 178 | $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 |
a1a382db | 179 | print $code; |
4c7c5ff6 | 180 | close STDOUT; # enforce flush |