3 # ====================================================================
4 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5 # project. Rights for redistribution and usage in source and binary
6 # forms are granted according to the OpenSSL license.
7 # ====================================================================
9 # I let hardware handle unaligned input, except on page boundaries
10 # (see below for details). Otherwise straightforward implementation
11 # with X vector in register bank. The module is big-endian [which is
12 # not big deal as there're no little-endian targets left around].
15 # --------------------------
20 if ($output =~ /64\.s/) {
27 } elsif ($output =~ /32\.s/) {
34 } else { die "nonsense $output"; }
36 ( defined shift || open STDOUT
,"| $^X ../perlasm/ppc-xlate.pl $output" ) ||
37 die "can't call ../perlasm/ppc-xlate.pl: $!";
57 @V=($A,$B,$C,$D,$E,$T);
58 @X=("r16","r17","r18","r19","r20","r21","r22","r23",
59 "r24","r25","r26","r27","r28","r29","r30","r31");
62 my ($i,$a,$b,$c,$d,$e,$f)=@_;
64 $code.=<<___
if ($i==0);
65 lwz
@X[$i],`$i*4`($inp)
67 $code.=<<___
if ($i<15);
68 lwz
@X[$j],`$j*4`($inp)
79 $code.=<<___
if ($i>=15);
82 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
85 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
90 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
92 rotlwi
@X[$j%16],@X[$j%16],1
97 my ($i,$a,$b,$c,$d,$e,$f)=@_;
99 $code.=<<___
if ($i<79);
102 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
105 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
109 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
111 rotlwi
@X[$j%16],@X[$j%16],1
113 $code.=<<___
if ($i==79);
131 my ($i,$a,$b,$c,$d,$e,$f)=@_;
136 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
139 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
143 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
146 rotlwi
@X[$j%16],@X[$j%16],1
155 .globl
.sha1_block_asm_data_order
157 .sha1_block_asm_data_order
:
159 $STU $sp,`-($FRAME+64+$RZONE)`($sp)
160 $PUSH r0
,`$FRAME-$SIZE_T*18`($sp)
161 $PUSH r15
,`$FRAME-$SIZE_T*17`($sp)
162 $PUSH r16
,`$FRAME-$SIZE_T*16`($sp)
163 $PUSH r17
,`$FRAME-$SIZE_T*15`($sp)
164 $PUSH r18
,`$FRAME-$SIZE_T*14`($sp)
165 $PUSH r19
,`$FRAME-$SIZE_T*13`($sp)
166 $PUSH r20
,`$FRAME-$SIZE_T*12`($sp)
167 $PUSH r21
,`$FRAME-$SIZE_T*11`($sp)
168 $PUSH r22
,`$FRAME-$SIZE_T*10`($sp)
169 $PUSH r23
,`$FRAME-$SIZE_T*9`($sp)
170 $PUSH r24
,`$FRAME-$SIZE_T*8`($sp)
171 $PUSH r25
,`$FRAME-$SIZE_T*7`($sp)
172 $PUSH r26
,`$FRAME-$SIZE_T*6`($sp)
173 $PUSH r27
,`$FRAME-$SIZE_T*5`($sp)
174 $PUSH r28
,`$FRAME-$SIZE_T*4`($sp)
175 $PUSH r29
,`$FRAME-$SIZE_T*3`($sp)
176 $PUSH r30
,`$FRAME-$SIZE_T*2`($sp)
177 $PUSH r31
,`$FRAME-$SIZE_T*1`($sp)
187 bl Lsha1_block_private
189 $POP r0
,`$FRAME-$SIZE_T*18`($sp)
190 $POP r15
,`$FRAME-$SIZE_T*17`($sp)
191 $POP r16
,`$FRAME-$SIZE_T*16`($sp)
192 $POP r17
,`$FRAME-$SIZE_T*15`($sp)
193 $POP r18
,`$FRAME-$SIZE_T*14`($sp)
194 $POP r19
,`$FRAME-$SIZE_T*13`($sp)
195 $POP r20
,`$FRAME-$SIZE_T*12`($sp)
196 $POP r21
,`$FRAME-$SIZE_T*11`($sp)
197 $POP r22
,`$FRAME-$SIZE_T*10`($sp)
198 $POP r23
,`$FRAME-$SIZE_T*9`($sp)
199 $POP r24
,`$FRAME-$SIZE_T*8`($sp)
200 $POP r25
,`$FRAME-$SIZE_T*7`($sp)
201 $POP r26
,`$FRAME-$SIZE_T*6`($sp)
202 $POP r27
,`$FRAME-$SIZE_T*5`($sp)
203 $POP r28
,`$FRAME-$SIZE_T*4`($sp)
204 $POP r29
,`$FRAME-$SIZE_T*3`($sp)
205 $POP r30
,`$FRAME-$SIZE_T*2`($sp)
206 $POP r31
,`$FRAME-$SIZE_T*1`($sp)
208 addi
$sp,$sp,`$FRAME+64+$RZONE`
212 # PowerPC specification allows an implementation to be ill-behaved
213 # upon unaligned access which crosses page boundary. "Better safe
214 # than sorry" principle makes me treat it specially. But I don't
215 # look for particular offending word, but rather for 64-byte input
216 # block which crosses the boundary. Once found that block is aligned
217 # and hashed separately...
223 andi
. $t1,$t1,4095 ; distance to closest page boundary
224 srwi
. $t1,$t1,6 ; t1
/=64
227 ble
- Laligned
; didn
't cross the page boundary
230 bl Lsha1_block_private
234 addi r20,$sp,$FRAME ; spot below the frame
248 $PUSH $inp,`$FRAME-$SIZE_T*19`($sp)
252 bl Lsha1_block_private
253 $POP $inp,`$FRAME-$SIZE_T*19`($sp)
259 # This is private block function, which uses tailored calling
260 # interface, namely upon entry SHA_CTX is pre-loaded to given
261 # registers and counter register contains amount of chunks to
267 $code.=<<___; # load K_00_19
271 for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
272 $code.=<<___; # load K_20_39
276 for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
277 $code.=<<___; # load K_40_59
281 for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
282 $code.=<<___; # load K_60_79
286 for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
303 addi $inp,$inp,`16*4`
304 bdnz- Lsha1_block_private
308 $code =~ s/\`([^\`]*)\`/eval $1/gem;