]>
Commit | Line | Data |
---|---|---|
6aa36e8e RS |
1 | #! /usr/bin/env perl |
2 | # Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved. | |
3 | # | |
a598ed0d | 4 | # Licensed under the Apache License 2.0 (the "License"). You may not use |
6aa36e8e RS |
5 | # this file except in compliance with the License. You can obtain a copy |
6 | # in the file LICENSE in the source distribution or at | |
7 | # https://www.openssl.org/source/license.html | |
8 | ||
a9c32ace AP |
9 | |
10 | # ==================================================================== | |
e3713c36 | 11 | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL |
f0f61f6d AP |
12 | # project. The module is, however, dual licensed under OpenSSL and |
13 | # CRYPTOGAMS licenses depending on where you obtain it. For further | |
14 | # details see http://www.openssl.org/~appro/cryptogams/. | |
a9c32ace AP |
15 | # ==================================================================== |
16 | ||
4dca00ce | 17 | # I let hardware handle unaligned input(*), except on page boundaries |
a9c32ace | 18 | # (see below for details). Otherwise straightforward implementation |
5dc52b91 | 19 | # with X vector in register bank. |
4dca00ce AP |
20 | # |
21 | # (*) this means that this module is inappropriate for PPC403? Does | |
22 | # anybody know if pre-POWER3 can sustain unaligned load? | |
a9c32ace | 23 | |
4dca00ce AP |
24 | # -m64 -m32 |
25 | # ---------------------------------- | |
26 | # PPC970,gcc-4.0.0 +76% +59% | |
399f94bf | 27 | # Power6,xlc-7 +68% +33% |
a9c32ace | 28 | |
addd641f | 29 | $flavour = shift; |
a9c32ace | 30 | |
addd641f | 31 | if ($flavour =~ /64/) { |
a9c32ace | 32 | $SIZE_T =8; |
67150340 | 33 | $LRSAVE =2*$SIZE_T; |
a9c32ace AP |
34 | $UCMP ="cmpld"; |
35 | $STU ="stdu"; | |
36 | $POP ="ld"; | |
37 | $PUSH ="std"; | |
addd641f | 38 | } elsif ($flavour =~ /32/) { |
a9c32ace | 39 | $SIZE_T =4; |
67150340 | 40 | $LRSAVE =$SIZE_T; |
a9c32ace AP |
41 | $UCMP ="cmplw"; |
42 | $STU ="stwu"; | |
43 | $POP ="lwz"; | |
44 | $PUSH ="stw"; | |
addd641f | 45 | } else { die "nonsense $flavour"; } |
a9c32ace | 46 | |
60250017 | 47 | # Define endianness based on flavour |
3f9562a6 | 48 | # i.e.: linux64le |
5dc52b91 | 49 | $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; |
3f9562a6 | 50 | |
f0f61f6d AP |
51 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
52 | ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or | |
53 | ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or | |
54 | die "can't locate ppc-xlate.pl"; | |
55 | ||
addd641f | 56 | open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; |
a9c32ace | 57 | |
67150340 AP |
58 | $FRAME=24*$SIZE_T+64; |
59 | $LOCALS=6*$SIZE_T; | |
a9c32ace AP |
60 | |
61 | $K ="r0"; | |
62 | $sp ="r1"; | |
63 | $toc="r2"; | |
64 | $ctx="r3"; | |
65 | $inp="r4"; | |
66 | $num="r5"; | |
67 | $t0 ="r15"; | |
68 | $t1 ="r6"; | |
69 | ||
70 | $A ="r7"; | |
71 | $B ="r8"; | |
72 | $C ="r9"; | |
73 | $D ="r10"; | |
74 | $E ="r11"; | |
75 | $T ="r12"; | |
76 | ||
77 | @V=($A,$B,$C,$D,$E,$T); | |
78 | @X=("r16","r17","r18","r19","r20","r21","r22","r23", | |
79 | "r24","r25","r26","r27","r28","r29","r30","r31"); | |
80 | ||
3f9562a6 AP |
81 | sub loadbe { |
82 | my ($dst, $src, $temp_reg) = @_; | |
83 | $code.=<<___ if (!$LITTLE_ENDIAN); | |
84 | lwz $dst,$src | |
85 | ___ | |
86 | $code.=<<___ if ($LITTLE_ENDIAN); | |
87 | lwz $temp_reg,$src | |
88 | rotlwi $dst,$temp_reg,8 | |
89 | rlwimi $dst,$temp_reg,24,0,7 | |
90 | rlwimi $dst,$temp_reg,24,16,23 | |
91 | ___ | |
92 | } | |
93 | ||
a9c32ace AP |
94 | sub BODY_00_19 { |
95 | my ($i,$a,$b,$c,$d,$e,$f)=@_; | |
96 | my $j=$i+1; | |
3f9562a6 AP |
97 | |
98 | # Since the last value of $f is discarded, we can use | |
99 | # it as a temp reg to swap byte-order when needed. | |
100 | loadbe("@X[$i]","`$i*4`($inp)",$f) if ($i==0); | |
101 | loadbe("@X[$j]","`$j*4`($inp)",$f) if ($i<15); | |
a9c32ace | 102 | $code.=<<___ if ($i<15); |
a9c32ace AP |
103 | add $f,$K,$e |
104 | rotlwi $e,$a,5 | |
105 | add $f,$f,@X[$i] | |
106 | and $t0,$c,$b | |
107 | add $f,$f,$e | |
108 | andc $t1,$d,$b | |
109 | rotlwi $b,$b,30 | |
110 | or $t0,$t0,$t1 | |
111 | add $f,$f,$t0 | |
112 | ___ | |
113 | $code.=<<___ if ($i>=15); | |
114 | add $f,$K,$e | |
115 | rotlwi $e,$a,5 | |
116 | xor @X[$j%16],@X[$j%16],@X[($j+2)%16] | |
117 | add $f,$f,@X[$i%16] | |
118 | and $t0,$c,$b | |
119 | xor @X[$j%16],@X[$j%16],@X[($j+8)%16] | |
120 | add $f,$f,$e | |
121 | andc $t1,$d,$b | |
122 | rotlwi $b,$b,30 | |
123 | or $t0,$t0,$t1 | |
124 | xor @X[$j%16],@X[$j%16],@X[($j+13)%16] | |
125 | add $f,$f,$t0 | |
126 | rotlwi @X[$j%16],@X[$j%16],1 | |
127 | ___ | |
128 | } | |
129 | ||
130 | sub BODY_20_39 { | |
131 | my ($i,$a,$b,$c,$d,$e,$f)=@_; | |
132 | my $j=$i+1; | |
133 | $code.=<<___ if ($i<79); | |
134 | add $f,$K,$e | |
5c359830 | 135 | xor $t0,$b,$d |
a9c32ace AP |
136 | rotlwi $e,$a,5 |
137 | xor @X[$j%16],@X[$j%16],@X[($j+2)%16] | |
138 | add $f,$f,@X[$i%16] | |
5c359830 | 139 | xor $t0,$t0,$c |
a9c32ace | 140 | xor @X[$j%16],@X[$j%16],@X[($j+8)%16] |
5c359830 | 141 | add $f,$f,$t0 |
a9c32ace | 142 | rotlwi $b,$b,30 |
a9c32ace | 143 | xor @X[$j%16],@X[$j%16],@X[($j+13)%16] |
5c359830 | 144 | add $f,$f,$e |
a9c32ace AP |
145 | rotlwi @X[$j%16],@X[$j%16],1 |
146 | ___ | |
147 | $code.=<<___ if ($i==79); | |
148 | add $f,$K,$e | |
5c359830 | 149 | xor $t0,$b,$d |
a9c32ace AP |
150 | rotlwi $e,$a,5 |
151 | lwz r16,0($ctx) | |
152 | add $f,$f,@X[$i%16] | |
5c359830 | 153 | xor $t0,$t0,$c |
a9c32ace | 154 | lwz r17,4($ctx) |
5c359830 | 155 | add $f,$f,$t0 |
a9c32ace AP |
156 | rotlwi $b,$b,30 |
157 | lwz r18,8($ctx) | |
a9c32ace | 158 | lwz r19,12($ctx) |
5c359830 | 159 | add $f,$f,$e |
a9c32ace AP |
160 | lwz r20,16($ctx) |
161 | ___ | |
162 | } | |
163 | ||
164 | sub BODY_40_59 { | |
165 | my ($i,$a,$b,$c,$d,$e,$f)=@_; | |
166 | my $j=$i+1; | |
167 | $code.=<<___; | |
168 | add $f,$K,$e | |
169 | rotlwi $e,$a,5 | |
170 | xor @X[$j%16],@X[$j%16],@X[($j+2)%16] | |
171 | add $f,$f,@X[$i%16] | |
172 | and $t0,$b,$c | |
173 | xor @X[$j%16],@X[$j%16],@X[($j+8)%16] | |
174 | add $f,$f,$e | |
175 | or $t1,$b,$c | |
176 | rotlwi $b,$b,30 | |
177 | xor @X[$j%16],@X[$j%16],@X[($j+13)%16] | |
178 | and $t1,$t1,$d | |
179 | or $t0,$t0,$t1 | |
180 | rotlwi @X[$j%16],@X[$j%16],1 | |
181 | add $f,$f,$t0 | |
182 | ___ | |
183 | } | |
184 | ||
185 | $code=<<___; | |
492279f6 | 186 | .machine "any" |
a9c32ace AP |
187 | .text |
188 | ||
c5f17d45 | 189 | .globl .sha1_block_data_order |
a9c32ace | 190 | .align 4 |
c5f17d45 | 191 | .sha1_block_data_order: |
67150340 | 192 | $STU $sp,-$FRAME($sp) |
a9c32ace | 193 | mflr r0 |
a9c32ace AP |
194 | $PUSH r15,`$FRAME-$SIZE_T*17`($sp) |
195 | $PUSH r16,`$FRAME-$SIZE_T*16`($sp) | |
196 | $PUSH r17,`$FRAME-$SIZE_T*15`($sp) | |
197 | $PUSH r18,`$FRAME-$SIZE_T*14`($sp) | |
198 | $PUSH r19,`$FRAME-$SIZE_T*13`($sp) | |
199 | $PUSH r20,`$FRAME-$SIZE_T*12`($sp) | |
200 | $PUSH r21,`$FRAME-$SIZE_T*11`($sp) | |
201 | $PUSH r22,`$FRAME-$SIZE_T*10`($sp) | |
202 | $PUSH r23,`$FRAME-$SIZE_T*9`($sp) | |
203 | $PUSH r24,`$FRAME-$SIZE_T*8`($sp) | |
204 | $PUSH r25,`$FRAME-$SIZE_T*7`($sp) | |
205 | $PUSH r26,`$FRAME-$SIZE_T*6`($sp) | |
206 | $PUSH r27,`$FRAME-$SIZE_T*5`($sp) | |
207 | $PUSH r28,`$FRAME-$SIZE_T*4`($sp) | |
208 | $PUSH r29,`$FRAME-$SIZE_T*3`($sp) | |
209 | $PUSH r30,`$FRAME-$SIZE_T*2`($sp) | |
210 | $PUSH r31,`$FRAME-$SIZE_T*1`($sp) | |
67150340 | 211 | $PUSH r0,`$FRAME+$LRSAVE`($sp) |
a9c32ace AP |
212 | lwz $A,0($ctx) |
213 | lwz $B,4($ctx) | |
214 | lwz $C,8($ctx) | |
215 | lwz $D,12($ctx) | |
216 | lwz $E,16($ctx) | |
217 | andi. r0,$inp,3 | |
218 | bne Lunaligned | |
219 | Laligned: | |
220 | mtctr $num | |
221 | bl Lsha1_block_private | |
67150340 | 222 | b Ldone |
a9c32ace | 223 | |
67150340 AP |
224 | ; PowerPC specification allows an implementation to be ill-behaved |
225 | ; upon unaligned access which crosses page boundary. "Better safe | |
226 | ; than sorry" principle makes me treat it specially. But I don't | |
227 | ; look for particular offending word, but rather for 64-byte input | |
228 | ; block which crosses the boundary. Once found that block is aligned | |
229 | ; and hashed separately... | |
a9c32ace AP |
230 | .align 4 |
231 | Lunaligned: | |
4dca00ce | 232 | subfic $t1,$inp,4096 |
a9c32ace AP |
233 | andi. $t1,$t1,4095 ; distance to closest page boundary |
234 | srwi. $t1,$t1,6 ; t1/=64 | |
235 | beq Lcross_page | |
236 | $UCMP $num,$t1 | |
20b88bb1 | 237 | ble Laligned ; didn't cross the page boundary |
a9c32ace | 238 | mtctr $t1 |
4dca00ce | 239 | subfc $num,$t1,$num |
a9c32ace AP |
240 | bl Lsha1_block_private |
241 | Lcross_page: | |
242 | li $t1,16 | |
243 | mtctr $t1 | |
67150340 | 244 | addi r20,$sp,$LOCALS ; spot within the frame |
a9c32ace AP |
245 | Lmemcpy: |
246 | lbz r16,0($inp) | |
247 | lbz r17,1($inp) | |
248 | lbz r18,2($inp) | |
249 | lbz r19,3($inp) | |
250 | addi $inp,$inp,4 | |
251 | stb r16,0(r20) | |
252 | stb r17,1(r20) | |
253 | stb r18,2(r20) | |
254 | stb r19,3(r20) | |
255 | addi r20,r20,4 | |
256 | bdnz Lmemcpy | |
257 | ||
67150340 | 258 | $PUSH $inp,`$FRAME-$SIZE_T*18`($sp) |
a9c32ace | 259 | li $t1,1 |
67150340 | 260 | addi $inp,$sp,$LOCALS |
a9c32ace AP |
261 | mtctr $t1 |
262 | bl Lsha1_block_private | |
67150340 | 263 | $POP $inp,`$FRAME-$SIZE_T*18`($sp) |
a9c32ace | 264 | addic. $num,$num,-1 |
20b88bb1 | 265 | bne Lunaligned |
67150340 AP |
266 | |
267 | Ldone: | |
268 | $POP r0,`$FRAME+$LRSAVE`($sp) | |
269 | $POP r15,`$FRAME-$SIZE_T*17`($sp) | |
270 | $POP r16,`$FRAME-$SIZE_T*16`($sp) | |
271 | $POP r17,`$FRAME-$SIZE_T*15`($sp) | |
272 | $POP r18,`$FRAME-$SIZE_T*14`($sp) | |
273 | $POP r19,`$FRAME-$SIZE_T*13`($sp) | |
274 | $POP r20,`$FRAME-$SIZE_T*12`($sp) | |
275 | $POP r21,`$FRAME-$SIZE_T*11`($sp) | |
276 | $POP r22,`$FRAME-$SIZE_T*10`($sp) | |
277 | $POP r23,`$FRAME-$SIZE_T*9`($sp) | |
278 | $POP r24,`$FRAME-$SIZE_T*8`($sp) | |
279 | $POP r25,`$FRAME-$SIZE_T*7`($sp) | |
280 | $POP r26,`$FRAME-$SIZE_T*6`($sp) | |
281 | $POP r27,`$FRAME-$SIZE_T*5`($sp) | |
282 | $POP r28,`$FRAME-$SIZE_T*4`($sp) | |
283 | $POP r29,`$FRAME-$SIZE_T*3`($sp) | |
284 | $POP r30,`$FRAME-$SIZE_T*2`($sp) | |
285 | $POP r31,`$FRAME-$SIZE_T*1`($sp) | |
286 | mtlr r0 | |
287 | addi $sp,$sp,$FRAME | |
288 | blr | |
289 | .long 0 | |
290 | .byte 0,12,4,1,0x80,18,3,0 | |
291 | .long 0 | |
a9c32ace AP |
292 | ___ |
293 | ||
294 | # This is private block function, which uses tailored calling | |
295 | # interface, namely upon entry SHA_CTX is pre-loaded to given | |
296 | # registers and counter register contains amount of chunks to | |
297 | # digest... | |
298 | $code.=<<___; | |
299 | .align 4 | |
300 | Lsha1_block_private: | |
301 | ___ | |
302 | $code.=<<___; # load K_00_19 | |
303 | lis $K,0x5a82 | |
304 | ori $K,$K,0x7999 | |
305 | ___ | |
306 | for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } | |
307 | $code.=<<___; # load K_20_39 | |
308 | lis $K,0x6ed9 | |
309 | ori $K,$K,0xeba1 | |
310 | ___ | |
311 | for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } | |
312 | $code.=<<___; # load K_40_59 | |
313 | lis $K,0x8f1b | |
314 | ori $K,$K,0xbcdc | |
315 | ___ | |
316 | for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } | |
317 | $code.=<<___; # load K_60_79 | |
318 | lis $K,0xca62 | |
319 | ori $K,$K,0xc1d6 | |
320 | ___ | |
321 | for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } | |
322 | $code.=<<___; | |
323 | add r16,r16,$E | |
324 | add r17,r17,$T | |
325 | add r18,r18,$A | |
326 | add r19,r19,$B | |
327 | add r20,r20,$C | |
328 | stw r16,0($ctx) | |
329 | mr $A,r16 | |
330 | stw r17,4($ctx) | |
331 | mr $B,r17 | |
332 | stw r18,8($ctx) | |
333 | mr $C,r18 | |
334 | stw r19,12($ctx) | |
335 | mr $D,r19 | |
336 | stw r20,16($ctx) | |
337 | mr $E,r20 | |
338 | addi $inp,$inp,`16*4` | |
20b88bb1 | 339 | bdnz Lsha1_block_private |
a9c32ace | 340 | blr |
67150340 AP |
341 | .long 0 |
342 | .byte 0,12,0x14,0,0,0,0,0 | |
76c15d79 | 343 | .size .sha1_block_data_order,.-.sha1_block_data_order |
a9c32ace | 344 | ___ |
f0f61f6d AP |
345 | $code.=<<___; |
346 | .asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>" | |
347 | ___ | |
a9c32ace AP |
348 | |
349 | $code =~ s/\`([^\`]*)\`/eval $1/gem; | |
350 | print $code; | |
351 | close STDOUT; |