]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/sha/asm/sha1-s390x.pl
Update copyright year
[thirdparty/openssl.git] / crypto / sha / asm / sha1-s390x.pl
1 #! /usr/bin/env perl
2 # Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16
17 # SHA1 block procedure for s390x.
18
19 # April 2007.
20 #
21 # Performance is >30% better than gcc 3.3 generated code. But the real
22 # twist is that SHA1 hardware support is detected and utilized. In
23 # which case performance can reach further >4.5x for larger chunks.
24
25 # January 2009.
26 #
27 # Optimize Xupdate for amount of memory references and reschedule
28 # instructions to favour dual-issue z10 pipeline. On z10 hardware is
29 # "only" ~2.3x faster than software.
30
31 # November 2010.
32 #
33 # Adapt for -m31 build. If kernel supports what's called "highgprs"
34 # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
35 # instructions and achieve "64-bit" performance even in 31-bit legacy
36 # application context. The feature is not specific to any particular
37 # processor, as long as it's "z-CPU". Latter implies that the code
38 # remains z/Architecture specific. On z990 it was measured to perform
39 # 23% better than code generated by gcc 4.3.
40
41 $kimdfunc=1; # magic function code for kimd instruction
42
43 # $output is the last argument if it looks like a file (it has an extension)
44 # $flavour is the first argument if it doesn't look like a file
45 $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
46 $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
47
48 if ($flavour =~ /3[12]/) {
49 $SIZE_T=4;
50 $g="";
51 } else {
52 $SIZE_T=8;
53 $g="g";
54 }
55
56 $output and open STDOUT,">$output";
57
58 $K_00_39="%r0"; $K=$K_00_39;
59 $K_40_79="%r1";
60 $ctx="%r2"; $prefetch="%r2";
61 $inp="%r3";
62 $len="%r4";
63
64 $A="%r5";
65 $B="%r6";
66 $C="%r7";
67 $D="%r8";
68 $E="%r9"; @V=($A,$B,$C,$D,$E);
69 $t0="%r10";
70 $t1="%r11";
71 @X=("%r12","%r13","%r14");
72 $sp="%r15";
73
74 $stdframe=16*$SIZE_T+4*8;
75 $frame=$stdframe+16*4;
76
77 sub Xupdate {
78 my $i=shift;
79
80 $code.=<<___ if ($i==15);
81 lg $prefetch,$stdframe($sp) ### Xupdate(16) warm-up
82 lr $X[0],$X[2]
83 ___
84 return if ($i&1); # Xupdate is vectorized and executed every 2nd cycle
85 $code.=<<___ if ($i<16);
86 lg $X[0],`$i*4`($inp) ### Xload($i)
87 rllg $X[1],$X[0],32
88 ___
89 $code.=<<___ if ($i>=16);
90 xgr $X[0],$prefetch ### Xupdate($i)
91 lg $prefetch,`$stdframe+4*(($i+2)%16)`($sp)
92 xg $X[0],`$stdframe+4*(($i+8)%16)`($sp)
93 xgr $X[0],$prefetch
94 rll $X[0],$X[0],1
95 rllg $X[1],$X[0],32
96 rll $X[1],$X[1],1
97 rllg $X[0],$X[1],32
98 lr $X[2],$X[1] # feedback
99 ___
100 $code.=<<___ if ($i<=70);
101 stg $X[0],`$stdframe+4*($i%16)`($sp)
102 ___
103 unshift(@X,pop(@X));
104 }
105
106 sub BODY_00_19 {
107 my ($i,$a,$b,$c,$d,$e)=@_;
108 my $xi=$X[1];
109
110 &Xupdate($i);
111 $code.=<<___;
112 alr $e,$K ### $i
113 rll $t1,$a,5
114 lr $t0,$d
115 xr $t0,$c
116 alr $e,$t1
117 nr $t0,$b
118 alr $e,$xi
119 xr $t0,$d
120 rll $b,$b,30
121 alr $e,$t0
122 ___
123 }
124
125 sub BODY_20_39 {
126 my ($i,$a,$b,$c,$d,$e)=@_;
127 my $xi=$X[1];
128
129 &Xupdate($i);
130 $code.=<<___;
131 alr $e,$K ### $i
132 rll $t1,$a,5
133 lr $t0,$b
134 alr $e,$t1
135 xr $t0,$c
136 alr $e,$xi
137 xr $t0,$d
138 rll $b,$b,30
139 alr $e,$t0
140 ___
141 }
142
143 sub BODY_40_59 {
144 my ($i,$a,$b,$c,$d,$e)=@_;
145 my $xi=$X[1];
146
147 &Xupdate($i);
148 $code.=<<___;
149 alr $e,$K ### $i
150 rll $t1,$a,5
151 lr $t0,$b
152 alr $e,$t1
153 or $t0,$c
154 lr $t1,$b
155 nr $t0,$d
156 nr $t1,$c
157 alr $e,$xi
158 or $t0,$t1
159 rll $b,$b,30
160 alr $e,$t0
161 ___
162 }
163
164 $code.=<<___;
165 #include "s390x_arch.h"
166
167 .text
168 .align 64
169 .type Ktable,\@object
170 Ktable: .long 0x5a827999,0x6ed9eba1,0x8f1bbcdc,0xca62c1d6
171 .skip 48 #.long 0,0,0,0,0,0,0,0,0,0,0,0
172 .size Ktable,.-Ktable
173 .globl sha1_block_data_order
174 .type sha1_block_data_order,\@function
175 sha1_block_data_order:
176 ___
177 $code.=<<___ if ($kimdfunc);
178 larl %r1,OPENSSL_s390xcap_P
179 lg %r0,S390X_KIMD(%r1) # check kimd capabilities
180 tmhh %r0,`0x8000>>$kimdfunc`
181 jz .Lsoftware
182 lghi %r0,$kimdfunc
183 lgr %r1,$ctx
184 lgr %r2,$inp
185 sllg %r3,$len,6
186 .long 0xb93e0002 # kimd %r0,%r2
187 brc 1,.-4 # pay attention to "partial completion"
188 br %r14
189 .align 16
190 .Lsoftware:
191 ___
192 $code.=<<___;
193 lghi %r1,-$frame
194 st${g} $ctx,`2*$SIZE_T`($sp)
195 stm${g} %r6,%r15,`6*$SIZE_T`($sp)
196 lgr %r0,$sp
197 la $sp,0(%r1,$sp)
198 st${g} %r0,0($sp)
199
200 larl $t0,Ktable
201 llgf $A,0($ctx)
202 llgf $B,4($ctx)
203 llgf $C,8($ctx)
204 llgf $D,12($ctx)
205 llgf $E,16($ctx)
206
207 lg $K_00_39,0($t0)
208 lg $K_40_79,8($t0)
209
210 .Lloop:
211 rllg $K_00_39,$K_00_39,32
212 ___
213 for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
214 $code.=<<___;
215 rllg $K_00_39,$K_00_39,32
216 ___
217 for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
218 $code.=<<___; $K=$K_40_79;
219 rllg $K_40_79,$K_40_79,32
220 ___
221 for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
222 $code.=<<___;
223 rllg $K_40_79,$K_40_79,32
224 ___
225 for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
226 $code.=<<___;
227
228 l${g} $ctx,`$frame+2*$SIZE_T`($sp)
229 la $inp,64($inp)
230 al $A,0($ctx)
231 al $B,4($ctx)
232 al $C,8($ctx)
233 al $D,12($ctx)
234 al $E,16($ctx)
235 st $A,0($ctx)
236 st $B,4($ctx)
237 st $C,8($ctx)
238 st $D,12($ctx)
239 st $E,16($ctx)
240 brct${g} $len,.Lloop
241
242 lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp)
243 br %r14
244 .size sha1_block_data_order,.-sha1_block_data_order
245 .string "SHA1 block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>"
246 ___
247
248 $code =~ s/\`([^\`]*)\`/eval $1/gem;
249
250 print $code;
251 close STDOUT or die "error closing STDOUT: $!";