]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/x86_64cpuid.pl
Replace "SSLeay" in API with OpenSSL
[thirdparty/openssl.git] / crypto / x86_64cpuid.pl
1 #!/usr/bin/env perl
2
3 $flavour = shift;
4 $output = shift;
5 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
6
7 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
8
9 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
11 ( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
12 die "can't locate x86_64-xlate.pl";
13
14 open OUT,"| \"$^X\" $xlate $flavour $output";
15 *STDOUT=*OUT;
16
17 ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
18 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
19
20 print<<___;
21 .extern OPENSSL_cpuid_setup
22 .hidden OPENSSL_cpuid_setup
23 .section .init
24 call OPENSSL_cpuid_setup
25
26 .hidden OPENSSL_ia32cap_P
27 .comm OPENSSL_ia32cap_P,16,4
28
29 .text
30
31 .globl OPENSSL_atomic_add
32 .type OPENSSL_atomic_add,\@abi-omnipotent
33 .align 16
34 OPENSSL_atomic_add:
35 movl ($arg1),%eax
36 .Lspin: leaq ($arg2,%rax),%r8
37 .byte 0xf0 # lock
38 cmpxchgl %r8d,($arg1)
39 jne .Lspin
40 movl %r8d,%eax
41 .byte 0x48,0x98 # cltq/cdqe
42 ret
43 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
44
45 .globl OPENSSL_rdtsc
46 .type OPENSSL_rdtsc,\@abi-omnipotent
47 .align 16
48 OPENSSL_rdtsc:
49 rdtsc
50 shl \$32,%rdx
51 or %rdx,%rax
52 ret
53 .size OPENSSL_rdtsc,.-OPENSSL_rdtsc
54
55 .globl OPENSSL_ia32_cpuid
56 .type OPENSSL_ia32_cpuid,\@function,1
57 .align 16
58 OPENSSL_ia32_cpuid:
59 mov %rbx,%r8 # save %rbx
60
61 xor %eax,%eax
62 mov %eax,8(%rdi) # clear 3rd word
63 cpuid
64 mov %eax,%r11d # max value for standard query level
65
66 xor %eax,%eax
67 cmp \$0x756e6547,%ebx # "Genu"
68 setne %al
69 mov %eax,%r9d
70 cmp \$0x49656e69,%edx # "ineI"
71 setne %al
72 or %eax,%r9d
73 cmp \$0x6c65746e,%ecx # "ntel"
74 setne %al
75 or %eax,%r9d # 0 indicates Intel CPU
76 jz .Lintel
77
78 cmp \$0x68747541,%ebx # "Auth"
79 setne %al
80 mov %eax,%r10d
81 cmp \$0x69746E65,%edx # "enti"
82 setne %al
83 or %eax,%r10d
84 cmp \$0x444D4163,%ecx # "cAMD"
85 setne %al
86 or %eax,%r10d # 0 indicates AMD CPU
87 jnz .Lintel
88
89 # AMD specific
90 mov \$0x80000000,%eax
91 cpuid
92 cmp \$0x80000001,%eax
93 jb .Lintel
94 mov %eax,%r10d
95 mov \$0x80000001,%eax
96 cpuid
97 or %ecx,%r9d
98 and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11
99
100 cmp \$0x80000008,%r10d
101 jb .Lintel
102
103 mov \$0x80000008,%eax
104 cpuid
105 movzb %cl,%r10 # number of cores - 1
106 inc %r10 # number of cores
107
108 mov \$1,%eax
109 cpuid
110 bt \$28,%edx # test hyper-threading bit
111 jnc .Lgeneric
112 shr \$16,%ebx # number of logical processors
113 cmp %r10b,%bl
114 ja .Lgeneric
115 and \$0xefffffff,%edx # ~(1<<28)
116 jmp .Lgeneric
117
118 .Lintel:
119 cmp \$4,%r11d
120 mov \$-1,%r10d
121 jb .Lnocacheinfo
122
123 mov \$4,%eax
124 mov \$0,%ecx # query L1D
125 cpuid
126 mov %eax,%r10d
127 shr \$14,%r10d
128 and \$0xfff,%r10d # number of cores -1 per L1D
129
130 cmp \$7,%r11d
131 jb .Lnocacheinfo
132
133 mov \$7,%eax
134 xor %ecx,%ecx
135 cpuid
136 mov %ebx,8(%rdi)
137
138 .Lnocacheinfo:
139 mov \$1,%eax
140 cpuid
141 and \$0xbfefffff,%edx # force reserved bits to 0
142 cmp \$0,%r9d
143 jne .Lnotintel
144 or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs
145 and \$15,%ah
146 cmp \$15,%ah # examine Family ID
147 jne .Lnotintel
148 or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR
149 .Lnotintel:
150 bt \$28,%edx # test hyper-threading bit
151 jnc .Lgeneric
152 and \$0xefffffff,%edx # ~(1<<28)
153 cmp \$0,%r10d
154 je .Lgeneric
155
156 or \$0x10000000,%edx # 1<<28
157 shr \$16,%ebx
158 cmp \$1,%bl # see if cache is shared
159 ja .Lgeneric
160 and \$0xefffffff,%edx # ~(1<<28)
161 .Lgeneric:
162 and \$0x00000800,%r9d # isolate AMD XOP flag
163 and \$0xfffff7ff,%ecx
164 or %ecx,%r9d # merge AMD XOP flag
165
166 mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx
167 bt \$27,%r9d # check OSXSAVE bit
168 jnc .Lclear_avx
169 xor %ecx,%ecx # XCR0
170 .byte 0x0f,0x01,0xd0 # xgetbv
171 and \$6,%eax # isolate XMM and YMM state support
172 cmp \$6,%eax
173 je .Ldone
174 .Lclear_avx:
175 mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
176 and %eax,%r9d # clear AVX, FMA and AMD XOP bits
177 andl \$0xffffffdf,8(%rdi) # cleax AVX2, ~(1<<5)
178 .Ldone:
179 shl \$32,%r9
180 mov %r10d,%eax
181 mov %r8,%rbx # restore %rbx
182 or %r9,%rax
183 ret
184 .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
185
186 .globl OPENSSL_cleanse
187 .type OPENSSL_cleanse,\@abi-omnipotent
188 .align 16
189 OPENSSL_cleanse:
190 xor %rax,%rax
191 cmp \$15,$arg2
192 jae .Lot
193 cmp \$0,$arg2
194 je .Lret
195 .Little:
196 mov %al,($arg1)
197 sub \$1,$arg2
198 lea 1($arg1),$arg1
199 jnz .Little
200 .Lret:
201 ret
202 .align 16
203 .Lot:
204 test \$7,$arg1
205 jz .Laligned
206 mov %al,($arg1)
207 lea -1($arg2),$arg2
208 lea 1($arg1),$arg1
209 jmp .Lot
210 .Laligned:
211 mov %rax,($arg1)
212 lea -8($arg2),$arg2
213 test \$-8,$arg2
214 lea 8($arg1),$arg1
215 jnz .Laligned
216 cmp \$0,$arg2
217 jne .Little
218 ret
219 .size OPENSSL_cleanse,.-OPENSSL_cleanse
220 ___
221
222 print<<___ if (!$win64);
223 .globl OPENSSL_wipe_cpu
224 .type OPENSSL_wipe_cpu,\@abi-omnipotent
225 .align 16
226 OPENSSL_wipe_cpu:
227 pxor %xmm0,%xmm0
228 pxor %xmm1,%xmm1
229 pxor %xmm2,%xmm2
230 pxor %xmm3,%xmm3
231 pxor %xmm4,%xmm4
232 pxor %xmm5,%xmm5
233 pxor %xmm6,%xmm6
234 pxor %xmm7,%xmm7
235 pxor %xmm8,%xmm8
236 pxor %xmm9,%xmm9
237 pxor %xmm10,%xmm10
238 pxor %xmm11,%xmm11
239 pxor %xmm12,%xmm12
240 pxor %xmm13,%xmm13
241 pxor %xmm14,%xmm14
242 pxor %xmm15,%xmm15
243 xorq %rcx,%rcx
244 xorq %rdx,%rdx
245 xorq %rsi,%rsi
246 xorq %rdi,%rdi
247 xorq %r8,%r8
248 xorq %r9,%r9
249 xorq %r10,%r10
250 xorq %r11,%r11
251 leaq 8(%rsp),%rax
252 ret
253 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
254 ___
255 print<<___ if ($win64);
256 .globl OPENSSL_wipe_cpu
257 .type OPENSSL_wipe_cpu,\@abi-omnipotent
258 .align 16
259 OPENSSL_wipe_cpu:
260 pxor %xmm0,%xmm0
261 pxor %xmm1,%xmm1
262 pxor %xmm2,%xmm2
263 pxor %xmm3,%xmm3
264 pxor %xmm4,%xmm4
265 pxor %xmm5,%xmm5
266 xorq %rcx,%rcx
267 xorq %rdx,%rdx
268 xorq %r8,%r8
269 xorq %r9,%r9
270 xorq %r10,%r10
271 xorq %r11,%r11
272 leaq 8(%rsp),%rax
273 ret
274 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
275 ___
276 {
277 my $out="%r10";
278 my $cnt="%rcx";
279 my $max="%r11";
280 my $lasttick="%r8d";
281 my $lastdiff="%r9d";
282 my $redzone=win64?8:-8;
283
284 print<<___;
285 .globl OPENSSL_instrument_bus
286 .type OPENSSL_instrument_bus,\@abi-omnipotent
287 .align 16
288 OPENSSL_instrument_bus:
289 mov $arg1,$out # tribute to Win64
290 mov $arg2,$cnt
291 mov $arg2,$max
292
293 rdtsc # collect 1st tick
294 mov %eax,$lasttick # lasttick = tick
295 mov \$0,$lastdiff # lastdiff = 0
296 clflush ($out)
297 .byte 0xf0 # lock
298 add $lastdiff,($out)
299 jmp .Loop
300 .align 16
301 .Loop: rdtsc
302 mov %eax,%edx
303 sub $lasttick,%eax
304 mov %edx,$lasttick
305 mov %eax,$lastdiff
306 clflush ($out)
307 .byte 0xf0 # lock
308 add %eax,($out)
309 lea 4($out),$out
310 sub \$1,$cnt
311 jnz .Loop
312
313 mov $max,%rax
314 ret
315 .size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
316
317 .globl OPENSSL_instrument_bus2
318 .type OPENSSL_instrument_bus2,\@abi-omnipotent
319 .align 16
320 OPENSSL_instrument_bus2:
321 mov $arg1,$out # tribute to Win64
322 mov $arg2,$cnt
323 mov $arg3,$max
324 mov $cnt,$redzone(%rsp)
325
326 rdtsc # collect 1st tick
327 mov %eax,$lasttick # lasttick = tick
328 mov \$0,$lastdiff # lastdiff = 0
329
330 clflush ($out)
331 .byte 0xf0 # lock
332 add $lastdiff,($out)
333
334 rdtsc # collect 1st diff
335 mov %eax,%edx
336 sub $lasttick,%eax # diff
337 mov %edx,$lasttick # lasttick = tick
338 mov %eax,$lastdiff # lastdiff = diff
339 .Loop2:
340 clflush ($out)
341 .byte 0xf0 # lock
342 add %eax,($out) # accumulate diff
343
344 sub \$1,$max
345 jz .Ldone2
346
347 rdtsc
348 mov %eax,%edx
349 sub $lasttick,%eax # diff
350 mov %edx,$lasttick # lasttick = tick
351 cmp $lastdiff,%eax
352 mov %eax,$lastdiff # lastdiff = diff
353 mov \$0,%edx
354 setne %dl
355 sub %rdx,$cnt # conditional --$cnt
356 lea ($out,%rdx,4),$out # conditional ++$out
357 jnz .Loop2
358
359 .Ldone2:
360 mov $redzone(%rsp),%rax
361 sub $cnt,%rax
362 ret
363 .size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
364 ___
365 }
366
367 print<<___;
368 .globl OPENSSL_ia32_rdrand
369 .type OPENSSL_ia32_rdrand,\@abi-omnipotent
370 .align 16
371 OPENSSL_ia32_rdrand:
372 mov \$8,%ecx
373 .Loop_rdrand:
374 rdrand %rax
375 jc .Lbreak_rdrand
376 loop .Loop_rdrand
377 .Lbreak_rdrand:
378 cmp \$0,%rax
379 cmove %rcx,%rax
380 ret
381 .size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
382
383 .globl OPENSSL_ia32_rdseed
384 .type OPENSSL_ia32_rdseed,\@abi-omnipotent
385 .align 16
386 OPENSSL_ia32_rdseed:
387 mov \$8,%ecx
388 .Loop_rdseed:
389 rdseed %rax
390 jc .Lbreak_rdseed
391 loop .Loop_rdseed
392 .Lbreak_rdseed:
393 cmp \$0,%rax
394 cmove %rcx,%rax
395 ret
396 .size OPENSSL_ia32_rdseed,.-OPENSSL_ia32_rdseed
397 ___
398
399 close STDOUT; # flush