]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/sha/asm/sha1-ppc.pl
Following the license change, modify the boilerplates in crypto/sha/
[thirdparty/openssl.git] / crypto / sha / asm / sha1-ppc.pl
CommitLineData
6aa36e8e
RS
1#! /usr/bin/env perl
2# Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
3#
a598ed0d 4# Licensed under the Apache License 2.0 (the "License"). You may not use
6aa36e8e
RS
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
a9c32ace
AP
9
10# ====================================================================
e3713c36 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
f0f61f6d
AP
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
a9c32ace
AP
15# ====================================================================
16
4dca00ce 17# I let hardware handle unaligned input(*), except on page boundaries
a9c32ace 18# (see below for details). Otherwise straightforward implementation
5dc52b91 19# with X vector in register bank.
4dca00ce
AP
20#
21# (*) this means that this module is inappropriate for PPC403? Does
22# anybody know if pre-POWER3 can sustain unaligned load?
a9c32ace 23
4dca00ce
AP
24# -m64 -m32
25# ----------------------------------
26# PPC970,gcc-4.0.0 +76% +59%
399f94bf 27# Power6,xlc-7 +68% +33%
a9c32ace 28
addd641f 29$flavour = shift;
a9c32ace 30
addd641f 31if ($flavour =~ /64/) {
a9c32ace 32 $SIZE_T =8;
67150340 33 $LRSAVE =2*$SIZE_T;
a9c32ace
AP
34 $UCMP ="cmpld";
35 $STU ="stdu";
36 $POP ="ld";
37 $PUSH ="std";
addd641f 38} elsif ($flavour =~ /32/) {
a9c32ace 39 $SIZE_T =4;
67150340 40 $LRSAVE =$SIZE_T;
a9c32ace
AP
41 $UCMP ="cmplw";
42 $STU ="stwu";
43 $POP ="lwz";
44 $PUSH ="stw";
addd641f 45} else { die "nonsense $flavour"; }
a9c32ace 46
60250017 47# Define endianness based on flavour
3f9562a6 48# i.e.: linux64le
5dc52b91 49$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
3f9562a6 50
f0f61f6d
AP
51$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
52( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
53( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
54die "can't locate ppc-xlate.pl";
55
addd641f 56open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
a9c32ace 57
67150340
AP
58$FRAME=24*$SIZE_T+64;
59$LOCALS=6*$SIZE_T;
a9c32ace
AP
60
61$K ="r0";
62$sp ="r1";
63$toc="r2";
64$ctx="r3";
65$inp="r4";
66$num="r5";
67$t0 ="r15";
68$t1 ="r6";
69
70$A ="r7";
71$B ="r8";
72$C ="r9";
73$D ="r10";
74$E ="r11";
75$T ="r12";
76
77@V=($A,$B,$C,$D,$E,$T);
78@X=("r16","r17","r18","r19","r20","r21","r22","r23",
79 "r24","r25","r26","r27","r28","r29","r30","r31");
80
3f9562a6
AP
81sub loadbe {
82my ($dst, $src, $temp_reg) = @_;
83$code.=<<___ if (!$LITTLE_ENDIAN);
84 lwz $dst,$src
85___
86$code.=<<___ if ($LITTLE_ENDIAN);
87 lwz $temp_reg,$src
88 rotlwi $dst,$temp_reg,8
89 rlwimi $dst,$temp_reg,24,0,7
90 rlwimi $dst,$temp_reg,24,16,23
91___
92}
93
a9c32ace
AP
94sub BODY_00_19 {
95my ($i,$a,$b,$c,$d,$e,$f)=@_;
96my $j=$i+1;
3f9562a6
AP
97
98 # Since the last value of $f is discarded, we can use
99 # it as a temp reg to swap byte-order when needed.
100 loadbe("@X[$i]","`$i*4`($inp)",$f) if ($i==0);
101 loadbe("@X[$j]","`$j*4`($inp)",$f) if ($i<15);
a9c32ace 102$code.=<<___ if ($i<15);
a9c32ace
AP
103 add $f,$K,$e
104 rotlwi $e,$a,5
105 add $f,$f,@X[$i]
106 and $t0,$c,$b
107 add $f,$f,$e
108 andc $t1,$d,$b
109 rotlwi $b,$b,30
110 or $t0,$t0,$t1
111 add $f,$f,$t0
112___
113$code.=<<___ if ($i>=15);
114 add $f,$K,$e
115 rotlwi $e,$a,5
116 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
117 add $f,$f,@X[$i%16]
118 and $t0,$c,$b
119 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
120 add $f,$f,$e
121 andc $t1,$d,$b
122 rotlwi $b,$b,30
123 or $t0,$t0,$t1
124 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
125 add $f,$f,$t0
126 rotlwi @X[$j%16],@X[$j%16],1
127___
128}
129
130sub BODY_20_39 {
131my ($i,$a,$b,$c,$d,$e,$f)=@_;
132my $j=$i+1;
133$code.=<<___ if ($i<79);
134 add $f,$K,$e
5c359830 135 xor $t0,$b,$d
a9c32ace
AP
136 rotlwi $e,$a,5
137 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
138 add $f,$f,@X[$i%16]
5c359830 139 xor $t0,$t0,$c
a9c32ace 140 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
5c359830 141 add $f,$f,$t0
a9c32ace 142 rotlwi $b,$b,30
a9c32ace 143 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
5c359830 144 add $f,$f,$e
a9c32ace
AP
145 rotlwi @X[$j%16],@X[$j%16],1
146___
147$code.=<<___ if ($i==79);
148 add $f,$K,$e
5c359830 149 xor $t0,$b,$d
a9c32ace
AP
150 rotlwi $e,$a,5
151 lwz r16,0($ctx)
152 add $f,$f,@X[$i%16]
5c359830 153 xor $t0,$t0,$c
a9c32ace 154 lwz r17,4($ctx)
5c359830 155 add $f,$f,$t0
a9c32ace
AP
156 rotlwi $b,$b,30
157 lwz r18,8($ctx)
a9c32ace 158 lwz r19,12($ctx)
5c359830 159 add $f,$f,$e
a9c32ace
AP
160 lwz r20,16($ctx)
161___
162}
163
164sub BODY_40_59 {
165my ($i,$a,$b,$c,$d,$e,$f)=@_;
166my $j=$i+1;
167$code.=<<___;
168 add $f,$K,$e
169 rotlwi $e,$a,5
170 xor @X[$j%16],@X[$j%16],@X[($j+2)%16]
171 add $f,$f,@X[$i%16]
172 and $t0,$b,$c
173 xor @X[$j%16],@X[$j%16],@X[($j+8)%16]
174 add $f,$f,$e
175 or $t1,$b,$c
176 rotlwi $b,$b,30
177 xor @X[$j%16],@X[$j%16],@X[($j+13)%16]
178 and $t1,$t1,$d
179 or $t0,$t0,$t1
180 rotlwi @X[$j%16],@X[$j%16],1
181 add $f,$f,$t0
182___
183}
184
185$code=<<___;
492279f6 186.machine "any"
a9c32ace
AP
187.text
188
c5f17d45 189.globl .sha1_block_data_order
a9c32ace 190.align 4
c5f17d45 191.sha1_block_data_order:
67150340 192 $STU $sp,-$FRAME($sp)
a9c32ace 193 mflr r0
a9c32ace
AP
194 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
195 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
196 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
197 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
198 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
199 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
200 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
201 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
202 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
203 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
204 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
205 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
206 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
207 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
208 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
209 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
210 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
67150340 211 $PUSH r0,`$FRAME+$LRSAVE`($sp)
a9c32ace
AP
212 lwz $A,0($ctx)
213 lwz $B,4($ctx)
214 lwz $C,8($ctx)
215 lwz $D,12($ctx)
216 lwz $E,16($ctx)
217 andi. r0,$inp,3
218 bne Lunaligned
219Laligned:
220 mtctr $num
221 bl Lsha1_block_private
67150340 222 b Ldone
a9c32ace 223
67150340
AP
224; PowerPC specification allows an implementation to be ill-behaved
225; upon unaligned access which crosses page boundary. "Better safe
226; than sorry" principle makes me treat it specially. But I don't
227; look for particular offending word, but rather for 64-byte input
228; block which crosses the boundary. Once found that block is aligned
229; and hashed separately...
a9c32ace
AP
230.align 4
231Lunaligned:
4dca00ce 232 subfic $t1,$inp,4096
a9c32ace
AP
233 andi. $t1,$t1,4095 ; distance to closest page boundary
234 srwi. $t1,$t1,6 ; t1/=64
235 beq Lcross_page
236 $UCMP $num,$t1
20b88bb1 237 ble Laligned ; didn't cross the page boundary
a9c32ace 238 mtctr $t1
4dca00ce 239 subfc $num,$t1,$num
a9c32ace
AP
240 bl Lsha1_block_private
241Lcross_page:
242 li $t1,16
243 mtctr $t1
67150340 244 addi r20,$sp,$LOCALS ; spot within the frame
a9c32ace
AP
245Lmemcpy:
246 lbz r16,0($inp)
247 lbz r17,1($inp)
248 lbz r18,2($inp)
249 lbz r19,3($inp)
250 addi $inp,$inp,4
251 stb r16,0(r20)
252 stb r17,1(r20)
253 stb r18,2(r20)
254 stb r19,3(r20)
255 addi r20,r20,4
256 bdnz Lmemcpy
257
67150340 258 $PUSH $inp,`$FRAME-$SIZE_T*18`($sp)
a9c32ace 259 li $t1,1
67150340 260 addi $inp,$sp,$LOCALS
a9c32ace
AP
261 mtctr $t1
262 bl Lsha1_block_private
67150340 263 $POP $inp,`$FRAME-$SIZE_T*18`($sp)
a9c32ace 264 addic. $num,$num,-1
20b88bb1 265 bne Lunaligned
67150340
AP
266
267Ldone:
268 $POP r0,`$FRAME+$LRSAVE`($sp)
269 $POP r15,`$FRAME-$SIZE_T*17`($sp)
270 $POP r16,`$FRAME-$SIZE_T*16`($sp)
271 $POP r17,`$FRAME-$SIZE_T*15`($sp)
272 $POP r18,`$FRAME-$SIZE_T*14`($sp)
273 $POP r19,`$FRAME-$SIZE_T*13`($sp)
274 $POP r20,`$FRAME-$SIZE_T*12`($sp)
275 $POP r21,`$FRAME-$SIZE_T*11`($sp)
276 $POP r22,`$FRAME-$SIZE_T*10`($sp)
277 $POP r23,`$FRAME-$SIZE_T*9`($sp)
278 $POP r24,`$FRAME-$SIZE_T*8`($sp)
279 $POP r25,`$FRAME-$SIZE_T*7`($sp)
280 $POP r26,`$FRAME-$SIZE_T*6`($sp)
281 $POP r27,`$FRAME-$SIZE_T*5`($sp)
282 $POP r28,`$FRAME-$SIZE_T*4`($sp)
283 $POP r29,`$FRAME-$SIZE_T*3`($sp)
284 $POP r30,`$FRAME-$SIZE_T*2`($sp)
285 $POP r31,`$FRAME-$SIZE_T*1`($sp)
286 mtlr r0
287 addi $sp,$sp,$FRAME
288 blr
289 .long 0
290 .byte 0,12,4,1,0x80,18,3,0
291 .long 0
a9c32ace
AP
292___
293
294# This is private block function, which uses tailored calling
295# interface, namely upon entry SHA_CTX is pre-loaded to given
296# registers and counter register contains amount of chunks to
297# digest...
298$code.=<<___;
299.align 4
300Lsha1_block_private:
301___
302$code.=<<___; # load K_00_19
303 lis $K,0x5a82
304 ori $K,$K,0x7999
305___
306for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
307$code.=<<___; # load K_20_39
308 lis $K,0x6ed9
309 ori $K,$K,0xeba1
310___
311for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
312$code.=<<___; # load K_40_59
313 lis $K,0x8f1b
314 ori $K,$K,0xbcdc
315___
316for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
317$code.=<<___; # load K_60_79
318 lis $K,0xca62
319 ori $K,$K,0xc1d6
320___
321for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
322$code.=<<___;
323 add r16,r16,$E
324 add r17,r17,$T
325 add r18,r18,$A
326 add r19,r19,$B
327 add r20,r20,$C
328 stw r16,0($ctx)
329 mr $A,r16
330 stw r17,4($ctx)
331 mr $B,r17
332 stw r18,8($ctx)
333 mr $C,r18
334 stw r19,12($ctx)
335 mr $D,r19
336 stw r20,16($ctx)
337 mr $E,r20
338 addi $inp,$inp,`16*4`
20b88bb1 339 bdnz Lsha1_block_private
a9c32ace 340 blr
67150340
AP
341 .long 0
342 .byte 0,12,0x14,0,0,0,0,0
76c15d79 343.size .sha1_block_data_order,.-.sha1_block_data_order
a9c32ace 344___
f0f61f6d
AP
345$code.=<<___;
346.asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
347___
a9c32ace
AP
348
349$code =~ s/\`([^\`]*)\`/eval $1/gem;
350print $code;
351close STDOUT;