]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/x86_64cpuid.pl
Add missing EVP_PKEY_METHOD accessors for digestsign and digestverify
[thirdparty/openssl.git] / crypto / x86_64cpuid.pl
1 #! /usr/bin/env perl
2 # Copyright 2005-2018 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9
10 # $output is the last argument if it looks like a file (it has an extension)
11 # $flavour is the first argument if it doesn't look like a file
12 $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
13 $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
14
15 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
16
17 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
18 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
19 ( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
20 die "can't locate x86_64-xlate.pl";
21
22 open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
23 or die "can't call $xlate: $!";
24 *STDOUT=*OUT;
25
26 ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
27 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
28
29 print<<___;
30 .extern OPENSSL_cpuid_setup
31 .hidden OPENSSL_cpuid_setup
32 .section .init
33 call OPENSSL_cpuid_setup
34
35 .hidden OPENSSL_ia32cap_P
36 .comm OPENSSL_ia32cap_P,16,4
37
38 .text
39
40 .globl OPENSSL_atomic_add
41 .type OPENSSL_atomic_add,\@abi-omnipotent
42 .align 16
43 OPENSSL_atomic_add:
44 movl ($arg1),%eax
45 .Lspin: leaq ($arg2,%rax),%r8
46 .byte 0xf0 # lock
47 cmpxchgl %r8d,($arg1)
48 jne .Lspin
49 movl %r8d,%eax
50 .byte 0x48,0x98 # cltq/cdqe
51 ret
52 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
53
54 .globl OPENSSL_rdtsc
55 .type OPENSSL_rdtsc,\@abi-omnipotent
56 .align 16
57 OPENSSL_rdtsc:
58 rdtsc
59 shl \$32,%rdx
60 or %rdx,%rax
61 ret
62 .size OPENSSL_rdtsc,.-OPENSSL_rdtsc
63
64 .globl OPENSSL_ia32_cpuid
65 .type OPENSSL_ia32_cpuid,\@function,1
66 .align 16
67 OPENSSL_ia32_cpuid:
68 .cfi_startproc
69 mov %rbx,%r8 # save %rbx
70 .cfi_register %rbx,%r8
71
72 xor %eax,%eax
73 mov %rax,8(%rdi) # clear extended feature flags
74 cpuid
75 mov %eax,%r11d # max value for standard query level
76
77 xor %eax,%eax
78 cmp \$0x756e6547,%ebx # "Genu"
79 setne %al
80 mov %eax,%r9d
81 cmp \$0x49656e69,%edx # "ineI"
82 setne %al
83 or %eax,%r9d
84 cmp \$0x6c65746e,%ecx # "ntel"
85 setne %al
86 or %eax,%r9d # 0 indicates Intel CPU
87 jz .Lintel
88
89 cmp \$0x68747541,%ebx # "Auth"
90 setne %al
91 mov %eax,%r10d
92 cmp \$0x69746E65,%edx # "enti"
93 setne %al
94 or %eax,%r10d
95 cmp \$0x444D4163,%ecx # "cAMD"
96 setne %al
97 or %eax,%r10d # 0 indicates AMD CPU
98 jnz .Lintel
99
100 # AMD specific
101 mov \$0x80000000,%eax
102 cpuid
103 cmp \$0x80000001,%eax
104 jb .Lintel
105 mov %eax,%r10d
106 mov \$0x80000001,%eax
107 cpuid
108 or %ecx,%r9d
109 and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11
110
111 cmp \$0x80000008,%r10d
112 jb .Lintel
113
114 mov \$0x80000008,%eax
115 cpuid
116 movzb %cl,%r10 # number of cores - 1
117 inc %r10 # number of cores
118
119 mov \$1,%eax
120 cpuid
121 bt \$28,%edx # test hyper-threading bit
122 jnc .Lgeneric
123 shr \$16,%ebx # number of logical processors
124 cmp %r10b,%bl
125 ja .Lgeneric
126 and \$0xefffffff,%edx # ~(1<<28)
127 jmp .Lgeneric
128
129 .Lintel:
130 cmp \$4,%r11d
131 mov \$-1,%r10d
132 jb .Lnocacheinfo
133
134 mov \$4,%eax
135 mov \$0,%ecx # query L1D
136 cpuid
137 mov %eax,%r10d
138 shr \$14,%r10d
139 and \$0xfff,%r10d # number of cores -1 per L1D
140
141 .Lnocacheinfo:
142 mov \$1,%eax
143 cpuid
144 movd %eax,%xmm0 # put aside processor id
145 and \$0xbfefffff,%edx # force reserved bits to 0
146 cmp \$0,%r9d
147 jne .Lnotintel
148 or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs
149 and \$15,%ah
150 cmp \$15,%ah # examine Family ID
151 jne .LnotP4
152 or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR
153 .LnotP4:
154 cmp \$6,%ah
155 jne .Lnotintel
156 and \$0x0fff0ff0,%eax
157 cmp \$0x00050670,%eax # Knights Landing
158 je .Lknights
159 cmp \$0x00080650,%eax # Knights Mill (according to sde)
160 jne .Lnotintel
161 .Lknights:
162 and \$0xfbffffff,%ecx # clear XSAVE flag to mimic Silvermont
163
164 .Lnotintel:
165 bt \$28,%edx # test hyper-threading bit
166 jnc .Lgeneric
167 and \$0xefffffff,%edx # ~(1<<28)
168 cmp \$0,%r10d
169 je .Lgeneric
170
171 or \$0x10000000,%edx # 1<<28
172 shr \$16,%ebx
173 cmp \$1,%bl # see if cache is shared
174 ja .Lgeneric
175 and \$0xefffffff,%edx # ~(1<<28)
176 .Lgeneric:
177 and \$0x00000800,%r9d # isolate AMD XOP flag
178 and \$0xfffff7ff,%ecx
179 or %ecx,%r9d # merge AMD XOP flag
180
181 mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx
182
183 cmp \$7,%r11d
184 jb .Lno_extended_info
185 mov \$7,%eax
186 xor %ecx,%ecx
187 cpuid
188 bt \$26,%r9d # check XSAVE bit, cleared on Knights
189 jc .Lnotknights
190 and \$0xfff7ffff,%ebx # clear ADCX/ADOX flag
191 .Lnotknights:
192 movd %xmm0,%eax # restore processor id
193 and \$0x0fff0ff0,%eax
194 cmp \$0x00050650,%eax # Skylake-X
195 jne .Lnotskylakex
196 and \$0xfffeffff,%ebx # ~(1<<16)
197 # suppress AVX512F flag on Skylake-X
198 .Lnotskylakex:
199 mov %ebx,8(%rdi) # save extended feature flags
200 mov %ecx,12(%rdi)
201 .Lno_extended_info:
202
203 bt \$27,%r9d # check OSXSAVE bit
204 jnc .Lclear_avx
205 xor %ecx,%ecx # XCR0
206 .byte 0x0f,0x01,0xd0 # xgetbv
207 and \$0xe6,%eax # isolate XMM, YMM and ZMM state support
208 cmp \$0xe6,%eax
209 je .Ldone
210 andl \$0x3fdeffff,8(%rdi) # ~(1<<31|1<<30|1<<21|1<<16)
211 # clear AVX512F+BW+VL+FIMA, all of
212 # them are EVEX-encoded, which requires
213 # ZMM state support even if one uses
214 # only XMM and YMM :-(
215 and \$6,%eax # isolate XMM and YMM state support
216 cmp \$6,%eax
217 je .Ldone
218 .Lclear_avx:
219 mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
220 and %eax,%r9d # clear AVX, FMA and AMD XOP bits
221 mov \$0x3fdeffdf,%eax # ~(1<<31|1<<30|1<<21|1<<16|1<<5)
222 and %eax,8(%rdi) # clear AVX2 and AVX512* bits
223 .Ldone:
224 shl \$32,%r9
225 mov %r10d,%eax
226 mov %r8,%rbx # restore %rbx
227 .cfi_restore %rbx
228 or %r9,%rax
229 ret
230 .cfi_endproc
231 .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
232
233 .globl OPENSSL_cleanse
234 .type OPENSSL_cleanse,\@abi-omnipotent
235 .align 16
236 OPENSSL_cleanse:
237 xor %rax,%rax
238 cmp \$15,$arg2
239 jae .Lot
240 cmp \$0,$arg2
241 je .Lret
242 .Little:
243 mov %al,($arg1)
244 sub \$1,$arg2
245 lea 1($arg1),$arg1
246 jnz .Little
247 .Lret:
248 ret
249 .align 16
250 .Lot:
251 test \$7,$arg1
252 jz .Laligned
253 mov %al,($arg1)
254 lea -1($arg2),$arg2
255 lea 1($arg1),$arg1
256 jmp .Lot
257 .Laligned:
258 mov %rax,($arg1)
259 lea -8($arg2),$arg2
260 test \$-8,$arg2
261 lea 8($arg1),$arg1
262 jnz .Laligned
263 cmp \$0,$arg2
264 jne .Little
265 ret
266 .size OPENSSL_cleanse,.-OPENSSL_cleanse
267
268 .globl CRYPTO_memcmp
269 .type CRYPTO_memcmp,\@abi-omnipotent
270 .align 16
271 CRYPTO_memcmp:
272 xor %rax,%rax
273 xor %r10,%r10
274 cmp \$0,$arg3
275 je .Lno_data
276 cmp \$16,$arg3
277 jne .Loop_cmp
278 mov ($arg1),%r10
279 mov 8($arg1),%r11
280 mov \$1,$arg3
281 xor ($arg2),%r10
282 xor 8($arg2),%r11
283 or %r11,%r10
284 cmovnz $arg3,%rax
285 ret
286
287 .align 16
288 .Loop_cmp:
289 mov ($arg1),%r10b
290 lea 1($arg1),$arg1
291 xor ($arg2),%r10b
292 lea 1($arg2),$arg2
293 or %r10b,%al
294 dec $arg3
295 jnz .Loop_cmp
296 neg %rax
297 shr \$63,%rax
298 .Lno_data:
299 ret
300 .size CRYPTO_memcmp,.-CRYPTO_memcmp
301 ___
302
303 print<<___ if (!$win64);
304 .globl OPENSSL_wipe_cpu
305 .type OPENSSL_wipe_cpu,\@abi-omnipotent
306 .align 16
307 OPENSSL_wipe_cpu:
308 pxor %xmm0,%xmm0
309 pxor %xmm1,%xmm1
310 pxor %xmm2,%xmm2
311 pxor %xmm3,%xmm3
312 pxor %xmm4,%xmm4
313 pxor %xmm5,%xmm5
314 pxor %xmm6,%xmm6
315 pxor %xmm7,%xmm7
316 pxor %xmm8,%xmm8
317 pxor %xmm9,%xmm9
318 pxor %xmm10,%xmm10
319 pxor %xmm11,%xmm11
320 pxor %xmm12,%xmm12
321 pxor %xmm13,%xmm13
322 pxor %xmm14,%xmm14
323 pxor %xmm15,%xmm15
324 xorq %rcx,%rcx
325 xorq %rdx,%rdx
326 xorq %rsi,%rsi
327 xorq %rdi,%rdi
328 xorq %r8,%r8
329 xorq %r9,%r9
330 xorq %r10,%r10
331 xorq %r11,%r11
332 leaq 8(%rsp),%rax
333 ret
334 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
335 ___
336 print<<___ if ($win64);
337 .globl OPENSSL_wipe_cpu
338 .type OPENSSL_wipe_cpu,\@abi-omnipotent
339 .align 16
340 OPENSSL_wipe_cpu:
341 pxor %xmm0,%xmm0
342 pxor %xmm1,%xmm1
343 pxor %xmm2,%xmm2
344 pxor %xmm3,%xmm3
345 pxor %xmm4,%xmm4
346 pxor %xmm5,%xmm5
347 xorq %rcx,%rcx
348 xorq %rdx,%rdx
349 xorq %r8,%r8
350 xorq %r9,%r9
351 xorq %r10,%r10
352 xorq %r11,%r11
353 leaq 8(%rsp),%rax
354 ret
355 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
356 ___
357 {
358 my $out="%r10";
359 my $cnt="%rcx";
360 my $max="%r11";
361 my $lasttick="%r8d";
362 my $lastdiff="%r9d";
363 my $redzone=win64?8:-8;
364
365 print<<___;
366 .globl OPENSSL_instrument_bus
367 .type OPENSSL_instrument_bus,\@abi-omnipotent
368 .align 16
369 OPENSSL_instrument_bus:
370 mov $arg1,$out # tribute to Win64
371 mov $arg2,$cnt
372 mov $arg2,$max
373
374 rdtsc # collect 1st tick
375 mov %eax,$lasttick # lasttick = tick
376 mov \$0,$lastdiff # lastdiff = 0
377 clflush ($out)
378 .byte 0xf0 # lock
379 add $lastdiff,($out)
380 jmp .Loop
381 .align 16
382 .Loop: rdtsc
383 mov %eax,%edx
384 sub $lasttick,%eax
385 mov %edx,$lasttick
386 mov %eax,$lastdiff
387 clflush ($out)
388 .byte 0xf0 # lock
389 add %eax,($out)
390 lea 4($out),$out
391 sub \$1,$cnt
392 jnz .Loop
393
394 mov $max,%rax
395 ret
396 .size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
397
398 .globl OPENSSL_instrument_bus2
399 .type OPENSSL_instrument_bus2,\@abi-omnipotent
400 .align 16
401 OPENSSL_instrument_bus2:
402 mov $arg1,$out # tribute to Win64
403 mov $arg2,$cnt
404 mov $arg3,$max
405 mov $cnt,$redzone(%rsp)
406
407 rdtsc # collect 1st tick
408 mov %eax,$lasttick # lasttick = tick
409 mov \$0,$lastdiff # lastdiff = 0
410
411 clflush ($out)
412 .byte 0xf0 # lock
413 add $lastdiff,($out)
414
415 rdtsc # collect 1st diff
416 mov %eax,%edx
417 sub $lasttick,%eax # diff
418 mov %edx,$lasttick # lasttick = tick
419 mov %eax,$lastdiff # lastdiff = diff
420 .Loop2:
421 clflush ($out)
422 .byte 0xf0 # lock
423 add %eax,($out) # accumulate diff
424
425 sub \$1,$max
426 jz .Ldone2
427
428 rdtsc
429 mov %eax,%edx
430 sub $lasttick,%eax # diff
431 mov %edx,$lasttick # lasttick = tick
432 cmp $lastdiff,%eax
433 mov %eax,$lastdiff # lastdiff = diff
434 mov \$0,%edx
435 setne %dl
436 sub %rdx,$cnt # conditional --$cnt
437 lea ($out,%rdx,4),$out # conditional ++$out
438 jnz .Loop2
439
440 .Ldone2:
441 mov $redzone(%rsp),%rax
442 sub $cnt,%rax
443 ret
444 .size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
445 ___
446 }
447
448 sub gen_random {
449 my $rdop = shift;
450 print<<___;
451 .globl OPENSSL_ia32_${rdop}_bytes
452 .type OPENSSL_ia32_${rdop}_bytes,\@abi-omnipotent
453 .align 16
454 OPENSSL_ia32_${rdop}_bytes:
455 xor %rax, %rax # return value
456 cmp \$0,$arg2
457 je .Ldone_${rdop}_bytes
458
459 mov \$8,%r11
460 .Loop_${rdop}_bytes:
461 ${rdop} %r10
462 jc .Lbreak_${rdop}_bytes
463 dec %r11
464 jnz .Loop_${rdop}_bytes
465 jmp .Ldone_${rdop}_bytes
466
467 .align 16
468 .Lbreak_${rdop}_bytes:
469 cmp \$8,$arg2
470 jb .Ltail_${rdop}_bytes
471 mov %r10,($arg1)
472 lea 8($arg1),$arg1
473 add \$8,%rax
474 sub \$8,$arg2
475 jz .Ldone_${rdop}_bytes
476 mov \$8,%r11
477 jmp .Loop_${rdop}_bytes
478
479 .align 16
480 .Ltail_${rdop}_bytes:
481 mov %r10b,($arg1)
482 lea 1($arg1),$arg1
483 inc %rax
484 shr \$8,%r10
485 dec $arg2
486 jnz .Ltail_${rdop}_bytes
487
488 .Ldone_${rdop}_bytes:
489 xor %r10,%r10 # Clear sensitive data from register
490 ret
491 .size OPENSSL_ia32_${rdop}_bytes,.-OPENSSL_ia32_${rdop}_bytes
492 ___
493 }
494 gen_random("rdrand");
495 gen_random("rdseed");
496
497 close STDOUT; # flush