]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/x86_64cpuid.pl
Add missing EVP_PKEY_METHOD accessors for digestsign and digestverify
[thirdparty/openssl.git] / crypto / x86_64cpuid.pl
CommitLineData
e0a65194 1#! /usr/bin/env perl
082193ef 2# Copyright 2005-2018 The OpenSSL Project Authors. All Rights Reserved.
e0a65194 3#
0e9725bc 4# Licensed under the Apache License 2.0 (the "License"). You may not use
e0a65194
RS
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
14e21f86 9
1aa89a7a
RL
10# $output is the last argument if it looks like a file (it has an extension)
11# $flavour is the first argument if it doesn't look like a file
12$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
13$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
14e21f86 14
aa8f38e4 15$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
376729e1 16
aa8f38e4 17$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
301799b8
AP
18( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
19( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
20die "can't locate x86_64-xlate.pl";
21
1aa89a7a
RL
22open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
23 or die "can't call $xlate: $!";
46bf83f0 24*STDOUT=*OUT;
376729e1 25
5fabb88a
AP
26($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
27 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
28
aa8f38e4
AP
29print<<___;
30.extern OPENSSL_cpuid_setup
ddc20d4d 31.hidden OPENSSL_cpuid_setup
aa8f38e4
AP
32.section .init
33 call OPENSSL_cpuid_setup
932cc129 34
ddc20d4d 35.hidden OPENSSL_ia32cap_P
c5cd28bd 36.comm OPENSSL_ia32cap_P,16,4
ddc20d4d 37
14e21f86 38.text
376729e1
AP
39
40.globl OPENSSL_atomic_add
aa8f38e4 41.type OPENSSL_atomic_add,\@abi-omnipotent
376729e1
AP
42.align 16
43OPENSSL_atomic_add:
aa8f38e4
AP
44 movl ($arg1),%eax
45.Lspin: leaq ($arg2,%rax),%r8
46 .byte 0xf0 # lock
47 cmpxchgl %r8d,($arg1)
376729e1 48 jne .Lspin
e442c362 49 movl %r8d,%eax
aa8f38e4 50 .byte 0x48,0x98 # cltq/cdqe
376729e1
AP
51 ret
52.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
53
932cc129
AP
54.globl OPENSSL_rdtsc
55.type OPENSSL_rdtsc,\@abi-omnipotent
56.align 16
57OPENSSL_rdtsc:
58 rdtsc
59 shl \$32,%rdx
60 or %rdx,%rax
61 ret
62.size OPENSSL_rdtsc,.-OPENSSL_rdtsc
63
376729e1 64.globl OPENSSL_ia32_cpuid
c5cd28bd 65.type OPENSSL_ia32_cpuid,\@function,1
376729e1
AP
66.align 16
67OPENSSL_ia32_cpuid:
5e32cfb2 68.cfi_startproc
b9064221 69 mov %rbx,%r8 # save %rbx
5e32cfb2 70.cfi_register %rbx,%r8
9babf392
AP
71
72 xor %eax,%eax
d6ee8f3d 73 mov %rax,8(%rdi) # clear extended feature flags
9babf392 74 cpuid
761393bb
AP
75 mov %eax,%r11d # max value for standard query level
76
9babf392
AP
77 xor %eax,%eax
78 cmp \$0x756e6547,%ebx # "Genu"
79 setne %al
80 mov %eax,%r9d
81 cmp \$0x49656e69,%edx # "ineI"
82 setne %al
83 or %eax,%r9d
84 cmp \$0x6c65746e,%ecx # "ntel"
85 setne %al
5cd91b50 86 or %eax,%r9d # 0 indicates Intel CPU
5cd91b50
AP
87 jz .Lintel
88
89 cmp \$0x68747541,%ebx # "Auth"
90 setne %al
91 mov %eax,%r10d
92 cmp \$0x69746E65,%edx # "enti"
93 setne %al
94 or %eax,%r10d
95 cmp \$0x444D4163,%ecx # "cAMD"
96 setne %al
97 or %eax,%r10d # 0 indicates AMD CPU
98 jnz .Lintel
99
761393bb 100 # AMD specific
5cd91b50
AP
101 mov \$0x80000000,%eax
102 cpuid
b9064221
AP
103 cmp \$0x80000001,%eax
104 jb .Lintel
105 mov %eax,%r10d
106 mov \$0x80000001,%eax
107 cpuid
108 or %ecx,%r9d
109 and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11
110
111 cmp \$0x80000008,%r10d
5cd91b50
AP
112 jb .Lintel
113
114 mov \$0x80000008,%eax
115 cpuid
116 movzb %cl,%r10 # number of cores - 1
117 inc %r10 # number of cores
9babf392 118
761393bb
AP
119 mov \$1,%eax
120 cpuid
121 bt \$28,%edx # test hyper-threading bit
b9064221 122 jnc .Lgeneric
761393bb
AP
123 shr \$16,%ebx # number of logical processors
124 cmp %r10b,%bl
b9064221 125 ja .Lgeneric
761393bb 126 and \$0xefffffff,%edx # ~(1<<28)
b9064221 127 jmp .Lgeneric
761393bb 128
5cd91b50 129.Lintel:
761393bb
AP
130 cmp \$4,%r11d
131 mov \$-1,%r10d
132 jb .Lnocacheinfo
133
134 mov \$4,%eax
135 mov \$0,%ecx # query L1D
136 cpuid
137 mov %eax,%r10d
138 shr \$14,%r10d
139 and \$0xfff,%r10d # number of cores -1 per L1D
140
141.Lnocacheinfo:
932cc129 142 mov \$1,%eax
376729e1 143 cpuid
79337628 144 movd %eax,%xmm0 # put aside processor id
4bb90087 145 and \$0xbfefffff,%edx # force reserved bits to 0
932cc129 146 cmp \$0,%r9d
9babf392 147 jne .Lnotintel
4bb90087 148 or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs
9babf392
AP
149 and \$15,%ah
150 cmp \$15,%ah # examine Family ID
64d92d74 151 jne .LnotP4
4bb90087 152 or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR
64d92d74
AP
153.LnotP4:
154 cmp \$6,%ah
155 jne .Lnotintel
d84df594 156 and \$0x0fff0ff0,%eax
64d92d74
AP
157 cmp \$0x00050670,%eax # Knights Landing
158 je .Lknights
159 cmp \$0x00080650,%eax # Knights Mill (according to sde)
160 jne .Lnotintel
161.Lknights:
162 and \$0xfbffffff,%ecx # clear XSAVE flag to mimic Silvermont
163
9babf392 164.Lnotintel:
3df2eff4 165 bt \$28,%edx # test hyper-threading bit
b9064221 166 jnc .Lgeneric
761393bb
AP
167 and \$0xefffffff,%edx # ~(1<<28)
168 cmp \$0,%r10d
b9064221 169 je .Lgeneric
761393bb
AP
170
171 or \$0x10000000,%edx # 1<<28
9babf392 172 shr \$16,%ebx
761393bb 173 cmp \$1,%bl # see if cache is shared
b9064221 174 ja .Lgeneric
932cc129 175 and \$0xefffffff,%edx # ~(1<<28)
b9064221
AP
176.Lgeneric:
177 and \$0x00000800,%r9d # isolate AMD XOP flag
178 and \$0xfffff7ff,%ecx
2bc3ad28 179 or %ecx,%r9d # merge AMD XOP flag
b9064221 180
2bc3ad28 181 mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx
1aed5e1a
AP
182
183 cmp \$7,%r11d
184 jb .Lno_extended_info
185 mov \$7,%eax
186 xor %ecx,%ecx
187 cpuid
64d92d74
AP
188 bt \$26,%r9d # check XSAVE bit, cleared on Knights
189 jc .Lnotknights
190 and \$0xfff7ffff,%ebx # clear ADCX/ADOX flag
191.Lnotknights:
79337628
AP
192 movd %xmm0,%eax # restore processor id
193 and \$0x0fff0ff0,%eax
194 cmp \$0x00050650,%eax # Skylake-X
195 jne .Lnotskylakex
196 and \$0xfffeffff,%ebx # ~(1<<16)
197 # suppress AVX512F flag on Skylake-X
198.Lnotskylakex:
1aed5e1a 199 mov %ebx,8(%rdi) # save extended feature flags
d6ee8f3d 200 mov %ecx,12(%rdi)
1aed5e1a
AP
201.Lno_extended_info:
202
2bc3ad28 203 bt \$27,%r9d # check OSXSAVE bit
b9064221
AP
204 jnc .Lclear_avx
205 xor %ecx,%ecx # XCR0
206 .byte 0x0f,0x01,0xd0 # xgetbv
66bee01c
AP
207 and \$0xe6,%eax # isolate XMM, YMM and ZMM state support
208 cmp \$0xe6,%eax
209 je .Ldone
88ac224c
AP
210 andl \$0x3fdeffff,8(%rdi) # ~(1<<31|1<<30|1<<21|1<<16)
211 # clear AVX512F+BW+VL+FIMA, all of
212 # them are EVEX-encoded, which requires
213 # ZMM state support even if one uses
214 # only XMM and YMM :-(
b9064221
AP
215 and \$6,%eax # isolate XMM and YMM state support
216 cmp \$6,%eax
217 je .Ldone
218.Lclear_avx:
219 mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
2bc3ad28 220 and %eax,%r9d # clear AVX, FMA and AMD XOP bits
66bee01c 221 mov \$0x3fdeffdf,%eax # ~(1<<31|1<<30|1<<21|1<<16|1<<5)
d67e7554 222 and %eax,8(%rdi) # clear AVX2 and AVX512* bits
b9064221 223.Ldone:
2bc3ad28
AP
224 shl \$32,%r9
225 mov %r10d,%eax
b9064221 226 mov %r8,%rbx # restore %rbx
5e32cfb2 227.cfi_restore %rbx
2bc3ad28 228 or %r9,%rax
376729e1 229 ret
5e32cfb2 230.cfi_endproc
376729e1 231.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
b2dba9bf
AP
232
233.globl OPENSSL_cleanse
aa8f38e4 234.type OPENSSL_cleanse,\@abi-omnipotent
b2dba9bf
AP
235.align 16
236OPENSSL_cleanse:
237 xor %rax,%rax
aa8f38e4 238 cmp \$15,$arg2
b2dba9bf 239 jae .Lot
7676eebf
AP
240 cmp \$0,$arg2
241 je .Lret
b2dba9bf 242.Little:
aa8f38e4
AP
243 mov %al,($arg1)
244 sub \$1,$arg2
245 lea 1($arg1),$arg1
b2dba9bf 246 jnz .Little
1fd79f66
AP
247.Lret:
248 ret
b2dba9bf
AP
249.align 16
250.Lot:
aa8f38e4 251 test \$7,$arg1
b2dba9bf 252 jz .Laligned
aa8f38e4
AP
253 mov %al,($arg1)
254 lea -1($arg2),$arg2
255 lea 1($arg1),$arg1
b2dba9bf
AP
256 jmp .Lot
257.Laligned:
aa8f38e4
AP
258 mov %rax,($arg1)
259 lea -8($arg2),$arg2
260 test \$-8,$arg2
261 lea 8($arg1),$arg1
b2dba9bf 262 jnz .Laligned
aa8f38e4 263 cmp \$0,$arg2
b2dba9bf
AP
264 jne .Little
265 ret
266.size OPENSSL_cleanse,.-OPENSSL_cleanse
e33826f0
AP
267
268.globl CRYPTO_memcmp
269.type CRYPTO_memcmp,\@abi-omnipotent
270.align 16
271CRYPTO_memcmp:
272 xor %rax,%rax
273 xor %r10,%r10
274 cmp \$0,$arg3
275 je .Lno_data
9a708bf9
AP
276 cmp \$16,$arg3
277 jne .Loop_cmp
278 mov ($arg1),%r10
279 mov 8($arg1),%r11
280 mov \$1,$arg3
281 xor ($arg2),%r10
282 xor 8($arg2),%r11
283 or %r11,%r10
284 cmovnz $arg3,%rax
285 ret
286
287.align 16
e33826f0
AP
288.Loop_cmp:
289 mov ($arg1),%r10b
290 lea 1($arg1),$arg1
291 xor ($arg2),%r10b
292 lea 1($arg2),$arg2
293 or %r10b,%al
294 dec $arg3
295 jnz .Loop_cmp
296 neg %rax
297 shr \$63,%rax
298.Lno_data:
299 ret
300.size CRYPTO_memcmp,.-CRYPTO_memcmp
14e21f86 301___
aa8f38e4
AP
302
303print<<___ if (!$win64);
304.globl OPENSSL_wipe_cpu
305.type OPENSSL_wipe_cpu,\@abi-omnipotent
306.align 16
307OPENSSL_wipe_cpu:
308 pxor %xmm0,%xmm0
309 pxor %xmm1,%xmm1
310 pxor %xmm2,%xmm2
311 pxor %xmm3,%xmm3
312 pxor %xmm4,%xmm4
313 pxor %xmm5,%xmm5
314 pxor %xmm6,%xmm6
315 pxor %xmm7,%xmm7
316 pxor %xmm8,%xmm8
317 pxor %xmm9,%xmm9
318 pxor %xmm10,%xmm10
319 pxor %xmm11,%xmm11
320 pxor %xmm12,%xmm12
321 pxor %xmm13,%xmm13
322 pxor %xmm14,%xmm14
323 pxor %xmm15,%xmm15
324 xorq %rcx,%rcx
325 xorq %rdx,%rdx
326 xorq %rsi,%rsi
327 xorq %rdi,%rdi
328 xorq %r8,%r8
329 xorq %r9,%r9
330 xorq %r10,%r10
331 xorq %r11,%r11
332 leaq 8(%rsp),%rax
333 ret
334.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
335___
336print<<___ if ($win64);
337.globl OPENSSL_wipe_cpu
338.type OPENSSL_wipe_cpu,\@abi-omnipotent
339.align 16
340OPENSSL_wipe_cpu:
341 pxor %xmm0,%xmm0
342 pxor %xmm1,%xmm1
343 pxor %xmm2,%xmm2
344 pxor %xmm3,%xmm3
345 pxor %xmm4,%xmm4
346 pxor %xmm5,%xmm5
347 xorq %rcx,%rcx
348 xorq %rdx,%rdx
349 xorq %r8,%r8
350 xorq %r9,%r9
351 xorq %r10,%r10
352 xorq %r11,%r11
353 leaq 8(%rsp),%rax
354 ret
355.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
356___
5fabb88a
AP
357{
358my $out="%r10";
359my $cnt="%rcx";
360my $max="%r11";
361my $lasttick="%r8d";
362my $lastdiff="%r9d";
363my $redzone=win64?8:-8;
364
365print<<___;
366.globl OPENSSL_instrument_bus
367.type OPENSSL_instrument_bus,\@abi-omnipotent
368.align 16
369OPENSSL_instrument_bus:
370 mov $arg1,$out # tribute to Win64
371 mov $arg2,$cnt
372 mov $arg2,$max
373
374 rdtsc # collect 1st tick
375 mov %eax,$lasttick # lasttick = tick
376 mov \$0,$lastdiff # lastdiff = 0
377 clflush ($out)
b9064221 378 .byte 0xf0 # lock
5fabb88a
AP
379 add $lastdiff,($out)
380 jmp .Loop
381.align 16
382.Loop: rdtsc
383 mov %eax,%edx
384 sub $lasttick,%eax
385 mov %edx,$lasttick
386 mov %eax,$lastdiff
387 clflush ($out)
b9064221 388 .byte 0xf0 # lock
5fabb88a
AP
389 add %eax,($out)
390 lea 4($out),$out
391 sub \$1,$cnt
392 jnz .Loop
393
394 mov $max,%rax
395 ret
396.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
397
398.globl OPENSSL_instrument_bus2
399.type OPENSSL_instrument_bus2,\@abi-omnipotent
400.align 16
401OPENSSL_instrument_bus2:
402 mov $arg1,$out # tribute to Win64
403 mov $arg2,$cnt
404 mov $arg3,$max
405 mov $cnt,$redzone(%rsp)
406
407 rdtsc # collect 1st tick
408 mov %eax,$lasttick # lasttick = tick
409 mov \$0,$lastdiff # lastdiff = 0
410
411 clflush ($out)
b9064221 412 .byte 0xf0 # lock
5fabb88a
AP
413 add $lastdiff,($out)
414
415 rdtsc # collect 1st diff
416 mov %eax,%edx
417 sub $lasttick,%eax # diff
418 mov %edx,$lasttick # lasttick = tick
419 mov %eax,$lastdiff # lastdiff = diff
420.Loop2:
421 clflush ($out)
b9064221 422 .byte 0xf0 # lock
5fabb88a
AP
423 add %eax,($out) # accumulate diff
424
425 sub \$1,$max
426 jz .Ldone2
427
428 rdtsc
429 mov %eax,%edx
430 sub $lasttick,%eax # diff
431 mov %edx,$lasttick # lasttick = tick
432 cmp $lastdiff,%eax
433 mov %eax,$lastdiff # lastdiff = diff
434 mov \$0,%edx
435 setne %dl
436 sub %rdx,$cnt # conditional --$cnt
437 lea ($out,%rdx,4),$out # conditional ++$out
438 jnz .Loop2
439
440.Ldone2:
441 mov $redzone(%rsp),%rax
442 sub $cnt,%rax
443 ret
444.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
445___
446}
aa8f38e4 447
9c940446
AP
448sub gen_random {
449my $rdop = shift;
301799b8 450print<<___;
9c940446
AP
451.globl OPENSSL_ia32_${rdop}_bytes
452.type OPENSSL_ia32_${rdop}_bytes,\@abi-omnipotent
f4d45640 453.align 16
9c940446
AP
454OPENSSL_ia32_${rdop}_bytes:
455 xor %rax, %rax # return value
456 cmp \$0,$arg2
457 je .Ldone_${rdop}_bytes
458
459 mov \$8,%r11
460.Loop_${rdop}_bytes:
461 ${rdop} %r10
462 jc .Lbreak_${rdop}_bytes
463 dec %r11
464 jnz .Loop_${rdop}_bytes
465 jmp .Ldone_${rdop}_bytes
466
467.align 16
468.Lbreak_${rdop}_bytes:
469 cmp \$8,$arg2
470 jb .Ltail_${rdop}_bytes
471 mov %r10,($arg1)
472 lea 8($arg1),$arg1
473 add \$8,%rax
474 sub \$8,$arg2
475 jz .Ldone_${rdop}_bytes
476 mov \$8,%r11
477 jmp .Loop_${rdop}_bytes
478
479.align 16
480.Ltail_${rdop}_bytes:
481 mov %r10b,($arg1)
482 lea 1($arg1),$arg1
483 inc %rax
082193ef 484 shr \$8,%r10
9c940446
AP
485 dec $arg2
486 jnz .Ltail_${rdop}_bytes
487
488.Ldone_${rdop}_bytes:
082193ef 489 xor %r10,%r10 # Clear sensitive data from register
f4d45640 490 ret
9c940446 491.size OPENSSL_ia32_${rdop}_bytes,.-OPENSSL_ia32_${rdop}_bytes
301799b8 492___
9c940446
AP
493}
494gen_random("rdrand");
495gen_random("rdseed");
301799b8 496
5d863367 497close STDOUT; # flush