]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/sha/asm/sha1-s390x.pl
Add OpenSSL copyright to .pl files
[thirdparty/openssl.git] / crypto / sha / asm / sha1-s390x.pl
CommitLineData
6aa36e8e
RS
1#! /usr/bin/env perl
2# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
a2a54ffc
AP
9
10# ====================================================================
11# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16
17# SHA1 block procedure for s390x.
18
19# April 2007.
20#
21# Performance is >30% better than gcc 3.3 generated code. But the real
22# twist is that SHA1 hardware support is detected and utilized. In
251718e4 23# which case performance can reach further >4.5x for larger chunks.
a2a54ffc 24
8626230a
AP
25# January 2009.
26#
27# Optimize Xupdate for amount of memory references and reschedule
28# instructions to favour dual-issue z10 pipeline. On z10 hardware is
29# "only" ~2.3x faster than software.
30
e822c756
AP
31# November 2010.
32#
33# Adapt for -m31 build. If kernel supports what's called "highgprs"
34# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
35# instructions and achieve "64-bit" performance even in 31-bit legacy
36# application context. The feature is not specific to any particular
37# processor, as long as it's "z-CPU". Latter implies that the code
d900a015 38# remains z/Architecture specific. On z990 it was measured to perform
da3bd277 39# 23% better than code generated by gcc 4.3.
e822c756 40
a2a54ffc
AP
41$kimdfunc=1; # magic function code for kimd instruction
42
e822c756
AP
43$flavour = shift;
44
45if ($flavour =~ /3[12]/) {
46 $SIZE_T=4;
47 $g="";
48} else {
49 $SIZE_T=8;
50 $g="g";
51}
52
a5aa63a4 53while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
a2a54ffc
AP
54open STDOUT,">$output";
55
8626230a
AP
56$K_00_39="%r0"; $K=$K_00_39;
57$K_40_79="%r1";
58$ctx="%r2"; $prefetch="%r2";
a2a54ffc
AP
59$inp="%r3";
60$len="%r4";
61
62$A="%r5";
63$B="%r6";
64$C="%r7";
65$D="%r8";
66$E="%r9"; @V=($A,$B,$C,$D,$E);
8626230a
AP
67$t0="%r10";
68$t1="%r11";
69@X=("%r12","%r13","%r14");
a2a54ffc
AP
70$sp="%r15";
71
e822c756
AP
72$stdframe=16*$SIZE_T+4*8;
73$frame=$stdframe+16*4;
a2a54ffc 74
a2a54ffc
AP
75sub Xupdate {
76my $i=shift;
77
8626230a 78$code.=<<___ if ($i==15);
e822c756 79 lg $prefetch,$stdframe($sp) ### Xupdate(16) warm-up
8626230a
AP
80 lr $X[0],$X[2]
81___
a2a54ffc 82return if ($i&1); # Xupdate is vectorized and executed every 2nd cycle
8626230a
AP
83$code.=<<___ if ($i<16);
84 lg $X[0],`$i*4`($inp) ### Xload($i)
85 rllg $X[1],$X[0],32
a2a54ffc 86___
8626230a
AP
87$code.=<<___ if ($i>=16);
88 xgr $X[0],$prefetch ### Xupdate($i)
e822c756
AP
89 lg $prefetch,`$stdframe+4*(($i+2)%16)`($sp)
90 xg $X[0],`$stdframe+4*(($i+8)%16)`($sp)
8626230a
AP
91 xgr $X[0],$prefetch
92 rll $X[0],$X[0],1
93 rllg $X[1],$X[0],32
94 rll $X[1],$X[1],1
95 rllg $X[0],$X[1],32
96 lr $X[2],$X[1] # feedback
a2a54ffc 97___
8626230a 98$code.=<<___ if ($i<=70);
e822c756 99 stg $X[0],`$stdframe+4*($i%16)`($sp)
a2a54ffc 100___
8626230a 101unshift(@X,pop(@X));
a2a54ffc 102}
8626230a
AP
103
104sub BODY_00_19 {
105my ($i,$a,$b,$c,$d,$e)=@_;
106my $xi=$X[1];
107
108 &Xupdate($i);
a2a54ffc 109$code.=<<___;
8626230a
AP
110 alr $e,$K ### $i
111 rll $t1,$a,5
112 lr $t0,$d
113 xr $t0,$c
114 alr $e,$t1
115 nr $t0,$b
116 alr $e,$xi
117 xr $t0,$d
118 rll $b,$b,30
119 alr $e,$t0
a2a54ffc
AP
120___
121}
122
a2a54ffc
AP
123sub BODY_20_39 {
124my ($i,$a,$b,$c,$d,$e)=@_;
8626230a 125my $xi=$X[1];
a2a54ffc
AP
126
127 &Xupdate($i);
128$code.=<<___;
8626230a
AP
129 alr $e,$K ### $i
130 rll $t1,$a,5
a2a54ffc 131 lr $t0,$b
8626230a 132 alr $e,$t1
a2a54ffc 133 xr $t0,$c
8626230a 134 alr $e,$xi
a2a54ffc 135 xr $t0,$d
a2a54ffc 136 rll $b,$b,30
8626230a 137 alr $e,$t0
a2a54ffc
AP
138___
139}
140
141sub BODY_40_59 {
142my ($i,$a,$b,$c,$d,$e)=@_;
8626230a 143my $xi=$X[1];
a2a54ffc
AP
144
145 &Xupdate($i);
146$code.=<<___;
8626230a
AP
147 alr $e,$K ### $i
148 rll $t1,$a,5
a2a54ffc 149 lr $t0,$b
8626230a 150 alr $e,$t1
a2a54ffc 151 or $t0,$c
a2a54ffc 152 lr $t1,$b
8626230a 153 nr $t0,$d
a2a54ffc 154 nr $t1,$c
8626230a 155 alr $e,$xi
a2a54ffc 156 or $t0,$t1
a2a54ffc 157 rll $b,$b,30
8626230a 158 alr $e,$t0
a2a54ffc
AP
159___
160}
161
162$code.=<<___;
163.text
8626230a
AP
164.align 64
165.type Ktable,\@object
166Ktable: .long 0x5a827999,0x6ed9eba1,0x8f1bbcdc,0xca62c1d6
167 .skip 48 #.long 0,0,0,0,0,0,0,0,0,0,0,0
168.size Ktable,.-Ktable
a2a54ffc
AP
169.globl sha1_block_data_order
170.type sha1_block_data_order,\@function
171sha1_block_data_order:
172___
173$code.=<<___ if ($kimdfunc);
91fdacb2
AP
174 larl %r1,OPENSSL_s390xcap_P
175 lg %r0,0(%r1)
176 tmhl %r0,0x4000 # check for message-security assist
177 jz .Lsoftware
670ad0fb 178 lg %r0,16(%r1) # check kimd capabilities
a2a54ffc
AP
179 tmhh %r0,`0x8000>>$kimdfunc`
180 jz .Lsoftware
181 lghi %r0,$kimdfunc
182 lgr %r1,$ctx
183 lgr %r2,$inp
184 sllg %r3,$len,6
185 .long 0xb93e0002 # kimd %r0,%r2
251718e4 186 brc 1,.-4 # pay attention to "partial completion"
a2a54ffc 187 br %r14
f06d0072 188.align 16
a2a54ffc
AP
189.Lsoftware:
190___
191$code.=<<___;
8626230a 192 lghi %r1,-$frame
e822c756
AP
193 st${g} $ctx,`2*$SIZE_T`($sp)
194 stm${g} %r6,%r15,`6*$SIZE_T`($sp)
a2a54ffc 195 lgr %r0,$sp
8626230a 196 la $sp,0(%r1,$sp)
e822c756 197 st${g} %r0,0($sp)
a2a54ffc 198
8626230a 199 larl $t0,Ktable
a2a54ffc
AP
200 llgf $A,0($ctx)
201 llgf $B,4($ctx)
202 llgf $C,8($ctx)
203 llgf $D,12($ctx)
204 llgf $E,16($ctx)
205
8626230a
AP
206 lg $K_00_39,0($t0)
207 lg $K_40_79,8($t0)
208
a2a54ffc 209.Lloop:
8626230a
AP
210 rllg $K_00_39,$K_00_39,32
211___
212for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
213$code.=<<___;
214 rllg $K_00_39,$K_00_39,32
a2a54ffc 215___
a2a54ffc 216for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
8626230a
AP
217$code.=<<___; $K=$K_40_79;
218 rllg $K_40_79,$K_40_79,32
219___
a2a54ffc 220for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
8626230a
AP
221$code.=<<___;
222 rllg $K_40_79,$K_40_79,32
223___
a2a54ffc
AP
224for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
225$code.=<<___;
226
e822c756 227 l${g} $ctx,`$frame+2*$SIZE_T`($sp)
8626230a 228 la $inp,64($inp)
a2a54ffc
AP
229 al $A,0($ctx)
230 al $B,4($ctx)
231 al $C,8($ctx)
232 al $D,12($ctx)
233 al $E,16($ctx)
234 st $A,0($ctx)
235 st $B,4($ctx)
236 st $C,8($ctx)
237 st $D,12($ctx)
238 st $E,16($ctx)
e822c756 239 brct${g} $len,.Lloop
a2a54ffc 240
e822c756 241 lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp)
a2a54ffc
AP
242 br %r14
243.size sha1_block_data_order,.-sha1_block_data_order
244.string "SHA1 block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>"
670ad0fb 245.comm OPENSSL_s390xcap_P,80,8
a2a54ffc
AP
246___
247
248$code =~ s/\`([^\`]*)\`/eval $1/gem;
249
250print $code;
251close STDOUT;