]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/bn/asm/sparcv9-gf2m.pl
Split bignum code out of the sparcv9cap.c
[thirdparty/openssl.git] / crypto / bn / asm / sparcv9-gf2m.pl
CommitLineData
6aa36e8e 1#! /usr/bin/env perl
33388b44 2# Copyright 2012-2020 The OpenSSL Project Authors. All Rights Reserved.
6aa36e8e 3#
367ace68 4# Licensed under the Apache License 2.0 (the "License"). You may not use
6aa36e8e
RS
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
0c832ec5
AP
9#
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16#
17# October 2012
18#
19# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
20# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
21# the time being... Except that it has two code paths: one suitable
22# for all SPARCv9 processors and one for VIS3-capable ones. Former
23# delivers ~25-45% more, more for longer keys, heaviest DH and DSA
24# verify operations on venerable UltraSPARC II. On T4 VIS3 code is
25# ~100-230% faster than gcc-generated code and ~35-90% faster than
26# the pure SPARCv9 code path.
27
1aa89a7a 28$output = pop and open STDOUT,">$output";
6bd7a4d9 29
0c832ec5
AP
30$locals=16*8;
31
0c832ec5
AP
32$tab="%l0";
33
34@T=("%g2","%g3");
35@i=("%g4","%g5");
36
37($a1,$a2,$a4,$a8,$a12,$a48)=map("%o$_",(0..5));
38($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo;
39
40$code.=<<___;
52f7e44e
TM
41#ifndef __ASSEMBLER__
42# define __ASSEMBLER__ 1
43#endif
44#include "crypto/sparc_arch.h"
1efd5830
AP
45
46#ifdef __arch64__
47.register %g2,#scratch
48.register %g3,#scratch
49#endif
50
0c832ec5
AP
51#ifdef __PIC__
52SPARC_PIC_THUNK(%g1)
53#endif
54
55.globl bn_GF2m_mul_2x2
56.align 16
57bn_GF2m_mul_2x2:
58 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
59 ld [%g1+0],%g1 ! OPENSSL_sparcv9cap_P[0]
60
61 andcc %g1, SPARCV9_VIS3, %g0
62 bz,pn %icc,.Lsoftware
63 nop
64
65 sllx %o1, 32, %o1
66 sllx %o3, 32, %o3
67 or %o2, %o1, %o1
68 or %o4, %o3, %o3
69 .word 0x95b262ab ! xmulx %o1, %o3, %o2
70 .word 0x99b262cb ! xmulxhi %o1, %o3, %o4
71 srlx %o2, 32, %o1 ! 13 cycles later
72 st %o2, [%o0+0]
73 st %o1, [%o0+4]
74 srlx %o4, 32, %o3
75 st %o4, [%o0+8]
76 retl
77 st %o3, [%o0+12]
78
79.align 16
80.Lsoftware:
1efd5830 81 save %sp,-STACK_FRAME-$locals,%sp
0c832ec5
AP
82
83 sllx %i1,32,$a
84 mov -1,$a12
85 sllx %i3,32,$b
86 or %i2,$a,$a
87 srlx $a12,1,$a48 ! 0x7fff...
88 or %i4,$b,$b
89 srlx $a12,2,$a12 ! 0x3fff...
1efd5830 90 add %sp,STACK_BIAS+STACK_FRAME,$tab
0c832ec5
AP
91
92 sllx $a,2,$a4
93 mov $a,$a1
94 sllx $a,1,$a2
95
96 srax $a4,63,@i[1] ! broadcast 61st bit
97 and $a48,$a4,$a4 ! (a<<2)&0x7fff...
98 srlx $a48,2,$a48
99 srax $a2,63,@i[0] ! broadcast 62nd bit
100 and $a12,$a2,$a2 ! (a<<1)&0x3fff...
101 srax $a1,63,$lo ! broadcast 63rd bit
102 and $a48,$a1,$a1 ! (a<<0)&0x1fff...
103
104 sllx $a1,3,$a8
105 and $b,$lo,$lo
106 and $b,@i[0],@i[0]
107 and $b,@i[1],@i[1]
108
109 stx %g0,[$tab+0*8] ! tab[0]=0
110 xor $a1,$a2,$a12
111 stx $a1,[$tab+1*8] ! tab[1]=a1
112 stx $a2,[$tab+2*8] ! tab[2]=a2
113 xor $a4,$a8,$a48
114 stx $a12,[$tab+3*8] ! tab[3]=a1^a2
115 xor $a4,$a1,$a1
116
117 stx $a4,[$tab+4*8] ! tab[4]=a4
118 xor $a4,$a2,$a2
119 stx $a1,[$tab+5*8] ! tab[5]=a1^a4
120 xor $a4,$a12,$a12
121 stx $a2,[$tab+6*8] ! tab[6]=a2^a4
122 xor $a48,$a1,$a1
123 stx $a12,[$tab+7*8] ! tab[7]=a1^a2^a4
124 xor $a48,$a2,$a2
125
126 stx $a8,[$tab+8*8] ! tab[8]=a8
127 xor $a48,$a12,$a12
128 stx $a1,[$tab+9*8] ! tab[9]=a1^a8
129 xor $a4,$a1,$a1
130 stx $a2,[$tab+10*8] ! tab[10]=a2^a8
131 xor $a4,$a2,$a2
132 stx $a12,[$tab+11*8] ! tab[11]=a1^a2^a8
133
134 xor $a4,$a12,$a12
135 stx $a48,[$tab+12*8] ! tab[12]=a4^a8
136 srlx $lo,1,$hi
137 stx $a1,[$tab+13*8] ! tab[13]=a1^a4^a8
138 sllx $lo,63,$lo
139 stx $a2,[$tab+14*8] ! tab[14]=a2^a4^a8
140 srlx @i[0],2,@T[0]
141 stx $a12,[$tab+15*8] ! tab[15]=a1^a2^a4^a8
142
143 sllx @i[0],62,$a1
144 sllx $b,3,@i[0]
145 srlx @i[1],3,@T[1]
146 and @i[0],`0xf<<3`,@i[0]
147 sllx @i[1],61,$a2
148 ldx [$tab+@i[0]],@i[0]
149 srlx $b,4-3,@i[1]
150 xor @T[0],$hi,$hi
151 and @i[1],`0xf<<3`,@i[1]
152 xor $a1,$lo,$lo
153 ldx [$tab+@i[1]],@i[1]
154 xor @T[1],$hi,$hi
155
156 xor @i[0],$lo,$lo
157 srlx $b,8-3,@i[0]
158 xor $a2,$lo,$lo
159 and @i[0],`0xf<<3`,@i[0]
160___
161for($n=1;$n<14;$n++) {
162$code.=<<___;
163 sllx @i[1],`$n*4`,@T[0]
164 ldx [$tab+@i[0]],@i[0]
165 srlx @i[1],`64-$n*4`,@T[1]
166 xor @T[0],$lo,$lo
167 srlx $b,`($n+2)*4`-3,@i[1]
168 xor @T[1],$hi,$hi
169 and @i[1],`0xf<<3`,@i[1]
170___
171 push(@i,shift(@i)); push(@T,shift(@T));
172}
173$code.=<<___;
174 sllx @i[1],`$n*4`,@T[0]
175 ldx [$tab+@i[0]],@i[0]
176 srlx @i[1],`64-$n*4`,@T[1]
177 xor @T[0],$lo,$lo
178
179 sllx @i[0],`($n+1)*4`,@T[0]
180 xor @T[1],$hi,$hi
181 srlx @i[0],`64-($n+1)*4`,@T[1]
182 xor @T[0],$lo,$lo
183 xor @T[1],$hi,$hi
184
185 srlx $lo,32,%i1
186 st $lo,[%i0+0]
187 st %i1,[%i0+4]
188 srlx $hi,32,%i2
189 st $hi,[%i0+8]
190 st %i2,[%i0+12]
191
192 ret
193 restore
194.type bn_GF2m_mul_2x2,#function
195.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
196.asciz "GF(2^m) Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
197.align 4
198___
199
200$code =~ s/\`([^\`]*)\`/eval($1)/gem;
201print $code;
a21314db 202close STDOUT or die "error closing STDOUT: $!";