]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/bn/asm/sparcv9-gf2m.pl
41ab8039ccdef91b0aa8eec3135f9d44b0ae4425
[thirdparty/openssl.git] / crypto / bn / asm / sparcv9-gf2m.pl
1 #! /usr/bin/env perl
2 # Copyright 2012-2020 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9 #
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16 #
17 # October 2012
18 #
19 # The module implements bn_GF2m_mul_2x2 polynomial multiplication used
20 # in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
21 # the time being... Except that it has two code paths: one suitable
22 # for all SPARCv9 processors and one for VIS3-capable ones. Former
23 # delivers ~25-45% more, more for longer keys, heaviest DH and DSA
24 # verify operations on venerable UltraSPARC II. On T4 VIS3 code is
25 # ~100-230% faster than gcc-generated code and ~35-90% faster than
26 # the pure SPARCv9 code path.
27
28 $output = pop and open STDOUT,">$output";
29
30 $locals=16*8;
31
32 $tab="%l0";
33
34 @T=("%g2","%g3");
35 @i=("%g4","%g5");
36
37 ($a1,$a2,$a4,$a8,$a12,$a48)=map("%o$_",(0..5));
38 ($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo;
39
40 $code.=<<___;
41 #include <sparc_arch.h>
42
43 #ifdef __arch64__
44 .register %g2,#scratch
45 .register %g3,#scratch
46 #endif
47
48 #ifdef __PIC__
49 SPARC_PIC_THUNK(%g1)
50 #endif
51
52 .globl bn_GF2m_mul_2x2
53 .align 16
54 bn_GF2m_mul_2x2:
55 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
56 ld [%g1+0],%g1 ! OPENSSL_sparcv9cap_P[0]
57
58 andcc %g1, SPARCV9_VIS3, %g0
59 bz,pn %icc,.Lsoftware
60 nop
61
62 sllx %o1, 32, %o1
63 sllx %o3, 32, %o3
64 or %o2, %o1, %o1
65 or %o4, %o3, %o3
66 .word 0x95b262ab ! xmulx %o1, %o3, %o2
67 .word 0x99b262cb ! xmulxhi %o1, %o3, %o4
68 srlx %o2, 32, %o1 ! 13 cycles later
69 st %o2, [%o0+0]
70 st %o1, [%o0+4]
71 srlx %o4, 32, %o3
72 st %o4, [%o0+8]
73 retl
74 st %o3, [%o0+12]
75
76 .align 16
77 .Lsoftware:
78 save %sp,-STACK_FRAME-$locals,%sp
79
80 sllx %i1,32,$a
81 mov -1,$a12
82 sllx %i3,32,$b
83 or %i2,$a,$a
84 srlx $a12,1,$a48 ! 0x7fff...
85 or %i4,$b,$b
86 srlx $a12,2,$a12 ! 0x3fff...
87 add %sp,STACK_BIAS+STACK_FRAME,$tab
88
89 sllx $a,2,$a4
90 mov $a,$a1
91 sllx $a,1,$a2
92
93 srax $a4,63,@i[1] ! broadcast 61st bit
94 and $a48,$a4,$a4 ! (a<<2)&0x7fff...
95 srlx $a48,2,$a48
96 srax $a2,63,@i[0] ! broadcast 62nd bit
97 and $a12,$a2,$a2 ! (a<<1)&0x3fff...
98 srax $a1,63,$lo ! broadcast 63rd bit
99 and $a48,$a1,$a1 ! (a<<0)&0x1fff...
100
101 sllx $a1,3,$a8
102 and $b,$lo,$lo
103 and $b,@i[0],@i[0]
104 and $b,@i[1],@i[1]
105
106 stx %g0,[$tab+0*8] ! tab[0]=0
107 xor $a1,$a2,$a12
108 stx $a1,[$tab+1*8] ! tab[1]=a1
109 stx $a2,[$tab+2*8] ! tab[2]=a2
110 xor $a4,$a8,$a48
111 stx $a12,[$tab+3*8] ! tab[3]=a1^a2
112 xor $a4,$a1,$a1
113
114 stx $a4,[$tab+4*8] ! tab[4]=a4
115 xor $a4,$a2,$a2
116 stx $a1,[$tab+5*8] ! tab[5]=a1^a4
117 xor $a4,$a12,$a12
118 stx $a2,[$tab+6*8] ! tab[6]=a2^a4
119 xor $a48,$a1,$a1
120 stx $a12,[$tab+7*8] ! tab[7]=a1^a2^a4
121 xor $a48,$a2,$a2
122
123 stx $a8,[$tab+8*8] ! tab[8]=a8
124 xor $a48,$a12,$a12
125 stx $a1,[$tab+9*8] ! tab[9]=a1^a8
126 xor $a4,$a1,$a1
127 stx $a2,[$tab+10*8] ! tab[10]=a2^a8
128 xor $a4,$a2,$a2
129 stx $a12,[$tab+11*8] ! tab[11]=a1^a2^a8
130
131 xor $a4,$a12,$a12
132 stx $a48,[$tab+12*8] ! tab[12]=a4^a8
133 srlx $lo,1,$hi
134 stx $a1,[$tab+13*8] ! tab[13]=a1^a4^a8
135 sllx $lo,63,$lo
136 stx $a2,[$tab+14*8] ! tab[14]=a2^a4^a8
137 srlx @i[0],2,@T[0]
138 stx $a12,[$tab+15*8] ! tab[15]=a1^a2^a4^a8
139
140 sllx @i[0],62,$a1
141 sllx $b,3,@i[0]
142 srlx @i[1],3,@T[1]
143 and @i[0],`0xf<<3`,@i[0]
144 sllx @i[1],61,$a2
145 ldx [$tab+@i[0]],@i[0]
146 srlx $b,4-3,@i[1]
147 xor @T[0],$hi,$hi
148 and @i[1],`0xf<<3`,@i[1]
149 xor $a1,$lo,$lo
150 ldx [$tab+@i[1]],@i[1]
151 xor @T[1],$hi,$hi
152
153 xor @i[0],$lo,$lo
154 srlx $b,8-3,@i[0]
155 xor $a2,$lo,$lo
156 and @i[0],`0xf<<3`,@i[0]
157 ___
158 for($n=1;$n<14;$n++) {
159 $code.=<<___;
160 sllx @i[1],`$n*4`,@T[0]
161 ldx [$tab+@i[0]],@i[0]
162 srlx @i[1],`64-$n*4`,@T[1]
163 xor @T[0],$lo,$lo
164 srlx $b,`($n+2)*4`-3,@i[1]
165 xor @T[1],$hi,$hi
166 and @i[1],`0xf<<3`,@i[1]
167 ___
168 push(@i,shift(@i)); push(@T,shift(@T));
169 }
170 $code.=<<___;
171 sllx @i[1],`$n*4`,@T[0]
172 ldx [$tab+@i[0]],@i[0]
173 srlx @i[1],`64-$n*4`,@T[1]
174 xor @T[0],$lo,$lo
175
176 sllx @i[0],`($n+1)*4`,@T[0]
177 xor @T[1],$hi,$hi
178 srlx @i[0],`64-($n+1)*4`,@T[1]
179 xor @T[0],$lo,$lo
180 xor @T[1],$hi,$hi
181
182 srlx $lo,32,%i1
183 st $lo,[%i0+0]
184 st %i1,[%i0+4]
185 srlx $hi,32,%i2
186 st $hi,[%i0+8]
187 st %i2,[%i0+12]
188
189 ret
190 restore
191 .type bn_GF2m_mul_2x2,#function
192 .size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
193 .asciz "GF(2^m) Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
194 .align 4
195 ___
196
197 $code =~ s/\`([^\`]*)\`/eval($1)/gem;
198 print $code;
199 close STDOUT or die "error closing STDOUT: $!";