]>
Commit | Line | Data |
---|---|---|
e0a65194 RS |
1 | #! /usr/bin/env perl |
2 | # Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. | |
3 | # | |
367ace68 | 4 | # Licensed under the Apache License 2.0 (the "License"). You may not use |
e0a65194 RS |
5 | # this file except in compliance with the License. You can obtain a copy |
6 | # in the file LICENSE in the source distribution or at | |
7 | # https://www.openssl.org/source/license.html | |
dfeab068 | 8 | |
4d1f3f7a DSH |
9 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
10 | push(@INC,"${dir}","${dir}../../perlasm"); | |
dfeab068 RE |
11 | require "x86asm.pl"; |
12 | ||
1aa89a7a | 13 | $output = pop and open STDOUT,">$output"; |
6bd7a4d9 | 14 | |
e195c8a2 | 15 | &asm_init($ARGV[0]); |
dfeab068 RE |
16 | |
17 | &bn_mul_comba("bn_mul_comba8",8); | |
18 | &bn_mul_comba("bn_mul_comba4",4); | |
19 | &bn_sqr_comba("bn_sqr_comba8",8); | |
20 | &bn_sqr_comba("bn_sqr_comba4",4); | |
21 | ||
22 | &asm_finish(); | |
23 | ||
a21314db | 24 | close STDOUT or die "error closing STDOUT: $!"; |
6bd7a4d9 | 25 | |
dfeab068 RE |
26 | sub mul_add_c |
27 | { | |
28 | local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | |
29 | ||
30 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next | |
31 | # words, and 1 if load return value | |
32 | ||
33 | &comment("mul a[$ai]*b[$bi]"); | |
34 | ||
35 | # "eax" and "edx" will always be pre-loaded. | |
36 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; | |
37 | # &mov("edx",&DWP($bi*4,$b,"",0)); | |
38 | ||
39 | &mul("edx"); | |
40 | &add($c0,"eax"); | |
7fa8bcfe | 41 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a |
dfeab068 RE |
42 | &mov("eax",&wparam(0)) if $pos > 0; # load r[] |
43 | ### | |
44 | &adc($c1,"edx"); | |
7fa8bcfe DMSP |
45 | &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # load next b |
46 | &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # load next b | |
dfeab068 RE |
47 | ### |
48 | &adc($c2,0); | |
609b0852 | 49 | # is pos > 1, it means it is the last loop |
dfeab068 | 50 | &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; |
7fa8bcfe | 51 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next a |
dfeab068 RE |
52 | } |
53 | ||
54 | sub sqr_add_c | |
55 | { | |
56 | local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | |
57 | ||
58 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next | |
59 | # words, and 1 if load return value | |
60 | ||
61 | &comment("sqr a[$ai]*a[$bi]"); | |
62 | ||
63 | # "eax" and "edx" will always be pre-loaded. | |
64 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; | |
65 | # &mov("edx",&DWP($bi*4,$b,"",0)); | |
66 | ||
67 | if ($ai == $bi) | |
68 | { &mul("eax");} | |
69 | else | |
70 | { &mul("edx");} | |
71 | &add($c0,"eax"); | |
72 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a | |
73 | ### | |
74 | &adc($c1,"edx"); | |
75 | &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb); | |
76 | ### | |
77 | &adc($c2,0); | |
609b0852 | 78 | # is pos > 1, it means it is the last loop |
dfeab068 RE |
79 | &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; |
80 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b | |
81 | } | |
82 | ||
83 | sub sqr_add_c2 | |
84 | { | |
85 | local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; | |
86 | ||
87 | # pos == -1 if eax and edx are pre-loaded, 0 to load from next | |
88 | # words, and 1 if load return value | |
89 | ||
90 | &comment("sqr a[$ai]*a[$bi]"); | |
91 | ||
92 | # "eax" and "edx" will always be pre-loaded. | |
93 | # &mov("eax",&DWP($ai*4,$a,"",0)) ; | |
94 | # &mov("edx",&DWP($bi*4,$a,"",0)); | |
95 | ||
96 | if ($ai == $bi) | |
97 | { &mul("eax");} | |
98 | else | |
99 | { &mul("edx");} | |
100 | &add("eax","eax"); | |
101 | ### | |
102 | &adc("edx","edx"); | |
103 | ### | |
104 | &adc($c2,0); | |
105 | &add($c0,"eax"); | |
106 | &adc($c1,"edx"); | |
107 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a | |
108 | &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b | |
109 | &adc($c2,0); | |
110 | &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; | |
111 | &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb); | |
112 | ### | |
113 | } | |
114 | ||
115 | sub bn_mul_comba | |
116 | { | |
117 | local($name,$num)=@_; | |
118 | local($a,$b,$c0,$c1,$c2); | |
119 | local($i,$as,$ae,$bs,$be,$ai,$bi); | |
120 | local($tot,$end); | |
121 | ||
122 | &function_begin_B($name,""); | |
123 | ||
124 | $c0="ebx"; | |
125 | $c1="ecx"; | |
126 | $c2="ebp"; | |
127 | $a="esi"; | |
128 | $b="edi"; | |
609b0852 | 129 | |
dfeab068 RE |
130 | $as=0; |
131 | $ae=0; | |
132 | $bs=0; | |
133 | $be=0; | |
134 | $tot=$num+$num-1; | |
135 | ||
136 | &push("esi"); | |
137 | &mov($a,&wparam(1)); | |
138 | &push("edi"); | |
139 | &mov($b,&wparam(2)); | |
140 | &push("ebp"); | |
141 | &push("ebx"); | |
142 | ||
143 | &xor($c0,$c0); | |
609b0852 | 144 | &mov("eax",&DWP(0,$a,"",0)); # load the first word |
dfeab068 | 145 | &xor($c1,$c1); |
609b0852 | 146 | &mov("edx",&DWP(0,$b,"",0)); # load the first second |
dfeab068 RE |
147 | |
148 | for ($i=0; $i<$tot; $i++) | |
149 | { | |
150 | $ai=$as; | |
151 | $bi=$bs; | |
152 | $end=$be+1; | |
153 | ||
609b0852 | 154 | &comment("################## Calculate word $i"); |
dfeab068 RE |
155 | |
156 | for ($j=$bs; $j<$end; $j++) | |
157 | { | |
158 | &xor($c2,$c2) if ($j == $bs); | |
159 | if (($j+1) == $end) | |
160 | { | |
161 | $v=1; | |
162 | $v=2 if (($i+1) == $tot); | |
163 | } | |
164 | else | |
165 | { $v=0; } | |
166 | if (($j+1) != $end) | |
167 | { | |
168 | $na=($ai-1); | |
169 | $nb=($bi+1); | |
170 | } | |
171 | else | |
172 | { | |
173 | $na=$as+($i < ($num-1)); | |
174 | $nb=$bs+($i >= ($num-1)); | |
175 | } | |
176 | #printf STDERR "[$ai,$bi] -> [$na,$nb]\n"; | |
177 | &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb); | |
178 | if ($v) | |
179 | { | |
180 | &comment("saved r[$i]"); | |
181 | # &mov("eax",&wparam(0)); | |
182 | # &mov(&DWP($i*4,"eax","",0),$c0); | |
183 | ($c0,$c1,$c2)=($c1,$c2,$c0); | |
184 | } | |
185 | $ai--; | |
186 | $bi++; | |
187 | } | |
188 | $as++ if ($i < ($num-1)); | |
189 | $ae++ if ($i >= ($num-1)); | |
190 | ||
191 | $bs++ if ($i >= ($num-1)); | |
192 | $be++ if ($i < ($num-1)); | |
193 | } | |
194 | &comment("save r[$i]"); | |
195 | # &mov("eax",&wparam(0)); | |
196 | &mov(&DWP($i*4,"eax","",0),$c0); | |
197 | ||
198 | &pop("ebx"); | |
199 | &pop("ebp"); | |
200 | &pop("edi"); | |
201 | &pop("esi"); | |
202 | &ret(); | |
203 | &function_end_B($name); | |
204 | } | |
205 | ||
206 | sub bn_sqr_comba | |
207 | { | |
208 | local($name,$num)=@_; | |
209 | local($r,$a,$c0,$c1,$c2)=@_; | |
210 | local($i,$as,$ae,$bs,$be,$ai,$bi); | |
211 | local($b,$tot,$end,$half); | |
212 | ||
213 | &function_begin_B($name,""); | |
214 | ||
215 | $c0="ebx"; | |
216 | $c1="ecx"; | |
217 | $c2="ebp"; | |
218 | $a="esi"; | |
219 | $r="edi"; | |
220 | ||
221 | &push("esi"); | |
222 | &push("edi"); | |
223 | &push("ebp"); | |
224 | &push("ebx"); | |
225 | &mov($r,&wparam(0)); | |
226 | &mov($a,&wparam(1)); | |
227 | &xor($c0,$c0); | |
228 | &xor($c1,$c1); | |
229 | &mov("eax",&DWP(0,$a,"",0)); # load the first word | |
230 | ||
231 | $as=0; | |
232 | $ae=0; | |
233 | $bs=0; | |
234 | $be=0; | |
235 | $tot=$num+$num-1; | |
236 | ||
237 | for ($i=0; $i<$tot; $i++) | |
238 | { | |
239 | $ai=$as; | |
240 | $bi=$bs; | |
241 | $end=$be+1; | |
242 | ||
243 | &comment("############### Calculate word $i"); | |
244 | for ($j=$bs; $j<$end; $j++) | |
245 | { | |
246 | &xor($c2,$c2) if ($j == $bs); | |
247 | if (($ai-1) < ($bi+1)) | |
248 | { | |
249 | $v=1; | |
250 | $v=2 if ($i+1) == $tot; | |
251 | } | |
252 | else | |
253 | { $v=0; } | |
254 | if (!$v) | |
255 | { | |
256 | $na=$ai-1; | |
257 | $nb=$bi+1; | |
258 | } | |
259 | else | |
260 | { | |
261 | $na=$as+($i < ($num-1)); | |
262 | $nb=$bs+($i >= ($num-1)); | |
263 | } | |
264 | if ($ai == $bi) | |
265 | { | |
266 | &sqr_add_c($r,$a,$ai,$bi, | |
267 | $c0,$c1,$c2,$v,$i,$na,$nb); | |
268 | } | |
269 | else | |
270 | { | |
271 | &sqr_add_c2($r,$a,$ai,$bi, | |
272 | $c0,$c1,$c2,$v,$i,$na,$nb); | |
273 | } | |
274 | if ($v) | |
275 | { | |
276 | &comment("saved r[$i]"); | |
277 | #&mov(&DWP($i*4,$r,"",0),$c0); | |
278 | ($c0,$c1,$c2)=($c1,$c2,$c0); | |
279 | last; | |
280 | } | |
281 | $ai--; | |
282 | $bi++; | |
283 | } | |
284 | $as++ if ($i < ($num-1)); | |
285 | $ae++ if ($i >= ($num-1)); | |
286 | ||
287 | $bs++ if ($i >= ($num-1)); | |
288 | $be++ if ($i < ($num-1)); | |
289 | } | |
290 | &mov(&DWP($i*4,$r,"",0),$c0); | |
291 | &pop("ebx"); | |
292 | &pop("ebp"); | |
293 | &pop("edi"); | |
294 | &pop("esi"); | |
295 | &ret(); | |
296 | &function_end_B($name); | |
297 | } |