]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/x86_64cpuid.pl
remove 0 assignments.
[thirdparty/openssl.git] / crypto / x86_64cpuid.pl
CommitLineData
14e21f86
AP
1#!/usr/bin/env perl
2
aa8f38e4
AP
3$flavour = shift;
4$output = shift;
5if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
14e21f86 6
aa8f38e4 7$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
376729e1 8
aa8f38e4 9$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
301799b8
AP
10( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
11( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
12die "can't locate x86_64-xlate.pl";
13
46bf83f0
AP
14open OUT,"| \"$^X\" $xlate $flavour $output";
15*STDOUT=*OUT;
376729e1 16
5fabb88a
AP
17($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
18 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
19
aa8f38e4
AP
20print<<___;
21.extern OPENSSL_cpuid_setup
ddc20d4d 22.hidden OPENSSL_cpuid_setup
aa8f38e4
AP
23.section .init
24 call OPENSSL_cpuid_setup
932cc129 25
ddc20d4d 26.hidden OPENSSL_ia32cap_P
c5cd28bd 27.comm OPENSSL_ia32cap_P,16,4
ddc20d4d 28
14e21f86 29.text
376729e1
AP
30
31.globl OPENSSL_atomic_add
aa8f38e4 32.type OPENSSL_atomic_add,\@abi-omnipotent
376729e1
AP
33.align 16
34OPENSSL_atomic_add:
aa8f38e4
AP
35 movl ($arg1),%eax
36.Lspin: leaq ($arg2,%rax),%r8
37 .byte 0xf0 # lock
38 cmpxchgl %r8d,($arg1)
376729e1 39 jne .Lspin
e442c362 40 movl %r8d,%eax
aa8f38e4 41 .byte 0x48,0x98 # cltq/cdqe
376729e1
AP
42 ret
43.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
44
932cc129
AP
45.globl OPENSSL_rdtsc
46.type OPENSSL_rdtsc,\@abi-omnipotent
47.align 16
48OPENSSL_rdtsc:
49 rdtsc
50 shl \$32,%rdx
51 or %rdx,%rax
52 ret
53.size OPENSSL_rdtsc,.-OPENSSL_rdtsc
54
376729e1 55.globl OPENSSL_ia32_cpuid
c5cd28bd 56.type OPENSSL_ia32_cpuid,\@function,1
376729e1
AP
57.align 16
58OPENSSL_ia32_cpuid:
b9064221 59 mov %rbx,%r8 # save %rbx
9babf392
AP
60
61 xor %eax,%eax
c5cd28bd 62 mov %eax,8(%rdi) # clear 3rd word
9babf392 63 cpuid
761393bb
AP
64 mov %eax,%r11d # max value for standard query level
65
9babf392
AP
66 xor %eax,%eax
67 cmp \$0x756e6547,%ebx # "Genu"
68 setne %al
69 mov %eax,%r9d
70 cmp \$0x49656e69,%edx # "ineI"
71 setne %al
72 or %eax,%r9d
73 cmp \$0x6c65746e,%ecx # "ntel"
74 setne %al
5cd91b50 75 or %eax,%r9d # 0 indicates Intel CPU
5cd91b50
AP
76 jz .Lintel
77
78 cmp \$0x68747541,%ebx # "Auth"
79 setne %al
80 mov %eax,%r10d
81 cmp \$0x69746E65,%edx # "enti"
82 setne %al
83 or %eax,%r10d
84 cmp \$0x444D4163,%ecx # "cAMD"
85 setne %al
86 or %eax,%r10d # 0 indicates AMD CPU
87 jnz .Lintel
88
761393bb 89 # AMD specific
5cd91b50
AP
90 mov \$0x80000000,%eax
91 cpuid
b9064221
AP
92 cmp \$0x80000001,%eax
93 jb .Lintel
94 mov %eax,%r10d
95 mov \$0x80000001,%eax
96 cpuid
97 or %ecx,%r9d
98 and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11
99
100 cmp \$0x80000008,%r10d
5cd91b50
AP
101 jb .Lintel
102
103 mov \$0x80000008,%eax
104 cpuid
105 movzb %cl,%r10 # number of cores - 1
106 inc %r10 # number of cores
9babf392 107
761393bb
AP
108 mov \$1,%eax
109 cpuid
110 bt \$28,%edx # test hyper-threading bit
b9064221 111 jnc .Lgeneric
761393bb
AP
112 shr \$16,%ebx # number of logical processors
113 cmp %r10b,%bl
b9064221 114 ja .Lgeneric
761393bb 115 and \$0xefffffff,%edx # ~(1<<28)
b9064221 116 jmp .Lgeneric
761393bb 117
5cd91b50 118.Lintel:
761393bb
AP
119 cmp \$4,%r11d
120 mov \$-1,%r10d
121 jb .Lnocacheinfo
122
123 mov \$4,%eax
124 mov \$0,%ecx # query L1D
125 cpuid
126 mov %eax,%r10d
127 shr \$14,%r10d
128 and \$0xfff,%r10d # number of cores -1 per L1D
129
c5cd28bd
AP
130 cmp \$7,%r11d
131 jb .Lnocacheinfo
132
133 mov \$7,%eax
134 xor %ecx,%ecx
135 cpuid
136 mov %ebx,8(%rdi)
137
761393bb 138.Lnocacheinfo:
932cc129 139 mov \$1,%eax
376729e1 140 cpuid
4bb90087 141 and \$0xbfefffff,%edx # force reserved bits to 0
932cc129 142 cmp \$0,%r9d
9babf392 143 jne .Lnotintel
4bb90087 144 or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs
9babf392
AP
145 and \$15,%ah
146 cmp \$15,%ah # examine Family ID
4bb90087
AP
147 jne .Lnotintel
148 or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR
9babf392 149.Lnotintel:
3df2eff4 150 bt \$28,%edx # test hyper-threading bit
b9064221 151 jnc .Lgeneric
761393bb
AP
152 and \$0xefffffff,%edx # ~(1<<28)
153 cmp \$0,%r10d
b9064221 154 je .Lgeneric
761393bb
AP
155
156 or \$0x10000000,%edx # 1<<28
9babf392 157 shr \$16,%ebx
761393bb 158 cmp \$1,%bl # see if cache is shared
b9064221 159 ja .Lgeneric
932cc129 160 and \$0xefffffff,%edx # ~(1<<28)
b9064221
AP
161.Lgeneric:
162 and \$0x00000800,%r9d # isolate AMD XOP flag
163 and \$0xfffff7ff,%ecx
2bc3ad28 164 or %ecx,%r9d # merge AMD XOP flag
b9064221 165
2bc3ad28
AP
166 mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx
167 bt \$27,%r9d # check OSXSAVE bit
b9064221
AP
168 jnc .Lclear_avx
169 xor %ecx,%ecx # XCR0
170 .byte 0x0f,0x01,0xd0 # xgetbv
171 and \$6,%eax # isolate XMM and YMM state support
172 cmp \$6,%eax
173 je .Ldone
174.Lclear_avx:
175 mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
2bc3ad28 176 and %eax,%r9d # clear AVX, FMA and AMD XOP bits
c5cd28bd 177 andl \$0xffffffdf,8(%rdi) # cleax AVX2, ~(1<<5)
b9064221 178.Ldone:
2bc3ad28
AP
179 shl \$32,%r9
180 mov %r10d,%eax
b9064221 181 mov %r8,%rbx # restore %rbx
2bc3ad28 182 or %r9,%rax
376729e1
AP
183 ret
184.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
b2dba9bf
AP
185
186.globl OPENSSL_cleanse
aa8f38e4 187.type OPENSSL_cleanse,\@abi-omnipotent
b2dba9bf
AP
188.align 16
189OPENSSL_cleanse:
190 xor %rax,%rax
aa8f38e4 191 cmp \$15,$arg2
b2dba9bf 192 jae .Lot
7676eebf
AP
193 cmp \$0,$arg2
194 je .Lret
b2dba9bf 195.Little:
aa8f38e4
AP
196 mov %al,($arg1)
197 sub \$1,$arg2
198 lea 1($arg1),$arg1
b2dba9bf 199 jnz .Little
1fd79f66
AP
200.Lret:
201 ret
b2dba9bf
AP
202.align 16
203.Lot:
aa8f38e4 204 test \$7,$arg1
b2dba9bf 205 jz .Laligned
aa8f38e4
AP
206 mov %al,($arg1)
207 lea -1($arg2),$arg2
208 lea 1($arg1),$arg1
b2dba9bf
AP
209 jmp .Lot
210.Laligned:
aa8f38e4
AP
211 mov %rax,($arg1)
212 lea -8($arg2),$arg2
213 test \$-8,$arg2
214 lea 8($arg1),$arg1
b2dba9bf 215 jnz .Laligned
aa8f38e4 216 cmp \$0,$arg2
b2dba9bf
AP
217 jne .Little
218 ret
219.size OPENSSL_cleanse,.-OPENSSL_cleanse
14e21f86 220___
aa8f38e4
AP
221
222print<<___ if (!$win64);
223.globl OPENSSL_wipe_cpu
224.type OPENSSL_wipe_cpu,\@abi-omnipotent
225.align 16
226OPENSSL_wipe_cpu:
227 pxor %xmm0,%xmm0
228 pxor %xmm1,%xmm1
229 pxor %xmm2,%xmm2
230 pxor %xmm3,%xmm3
231 pxor %xmm4,%xmm4
232 pxor %xmm5,%xmm5
233 pxor %xmm6,%xmm6
234 pxor %xmm7,%xmm7
235 pxor %xmm8,%xmm8
236 pxor %xmm9,%xmm9
237 pxor %xmm10,%xmm10
238 pxor %xmm11,%xmm11
239 pxor %xmm12,%xmm12
240 pxor %xmm13,%xmm13
241 pxor %xmm14,%xmm14
242 pxor %xmm15,%xmm15
243 xorq %rcx,%rcx
244 xorq %rdx,%rdx
245 xorq %rsi,%rsi
246 xorq %rdi,%rdi
247 xorq %r8,%r8
248 xorq %r9,%r9
249 xorq %r10,%r10
250 xorq %r11,%r11
251 leaq 8(%rsp),%rax
252 ret
253.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
254___
255print<<___ if ($win64);
256.globl OPENSSL_wipe_cpu
257.type OPENSSL_wipe_cpu,\@abi-omnipotent
258.align 16
259OPENSSL_wipe_cpu:
260 pxor %xmm0,%xmm0
261 pxor %xmm1,%xmm1
262 pxor %xmm2,%xmm2
263 pxor %xmm3,%xmm3
264 pxor %xmm4,%xmm4
265 pxor %xmm5,%xmm5
266 xorq %rcx,%rcx
267 xorq %rdx,%rdx
268 xorq %r8,%r8
269 xorq %r9,%r9
270 xorq %r10,%r10
271 xorq %r11,%r11
272 leaq 8(%rsp),%rax
273 ret
274.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
275___
5fabb88a
AP
276{
277my $out="%r10";
278my $cnt="%rcx";
279my $max="%r11";
280my $lasttick="%r8d";
281my $lastdiff="%r9d";
282my $redzone=win64?8:-8;
283
284print<<___;
285.globl OPENSSL_instrument_bus
286.type OPENSSL_instrument_bus,\@abi-omnipotent
287.align 16
288OPENSSL_instrument_bus:
289 mov $arg1,$out # tribute to Win64
290 mov $arg2,$cnt
291 mov $arg2,$max
292
293 rdtsc # collect 1st tick
294 mov %eax,$lasttick # lasttick = tick
295 mov \$0,$lastdiff # lastdiff = 0
296 clflush ($out)
b9064221 297 .byte 0xf0 # lock
5fabb88a
AP
298 add $lastdiff,($out)
299 jmp .Loop
300.align 16
301.Loop: rdtsc
302 mov %eax,%edx
303 sub $lasttick,%eax
304 mov %edx,$lasttick
305 mov %eax,$lastdiff
306 clflush ($out)
b9064221 307 .byte 0xf0 # lock
5fabb88a
AP
308 add %eax,($out)
309 lea 4($out),$out
310 sub \$1,$cnt
311 jnz .Loop
312
313 mov $max,%rax
314 ret
315.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
316
317.globl OPENSSL_instrument_bus2
318.type OPENSSL_instrument_bus2,\@abi-omnipotent
319.align 16
320OPENSSL_instrument_bus2:
321 mov $arg1,$out # tribute to Win64
322 mov $arg2,$cnt
323 mov $arg3,$max
324 mov $cnt,$redzone(%rsp)
325
326 rdtsc # collect 1st tick
327 mov %eax,$lasttick # lasttick = tick
328 mov \$0,$lastdiff # lastdiff = 0
329
330 clflush ($out)
b9064221 331 .byte 0xf0 # lock
5fabb88a
AP
332 add $lastdiff,($out)
333
334 rdtsc # collect 1st diff
335 mov %eax,%edx
336 sub $lasttick,%eax # diff
337 mov %edx,$lasttick # lasttick = tick
338 mov %eax,$lastdiff # lastdiff = diff
339.Loop2:
340 clflush ($out)
b9064221 341 .byte 0xf0 # lock
5fabb88a
AP
342 add %eax,($out) # accumulate diff
343
344 sub \$1,$max
345 jz .Ldone2
346
347 rdtsc
348 mov %eax,%edx
349 sub $lasttick,%eax # diff
350 mov %edx,$lasttick # lasttick = tick
351 cmp $lastdiff,%eax
352 mov %eax,$lastdiff # lastdiff = diff
353 mov \$0,%edx
354 setne %dl
355 sub %rdx,$cnt # conditional --$cnt
356 lea ($out,%rdx,4),$out # conditional ++$out
357 jnz .Loop2
358
359.Ldone2:
360 mov $redzone(%rsp),%rax
361 sub $cnt,%rax
362 ret
363.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
364___
365}
aa8f38e4 366
301799b8
AP
367print<<___;
368.globl OPENSSL_ia32_rdrand
369.type OPENSSL_ia32_rdrand,\@abi-omnipotent
370.align 16
371OPENSSL_ia32_rdrand:
372 mov \$8,%ecx
373.Loop_rdrand:
374 rdrand %rax
375 jc .Lbreak_rdrand
376 loop .Loop_rdrand
377.Lbreak_rdrand:
378 cmp \$0,%rax
379 cmove %rcx,%rax
380 ret
4d01f276 381.size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
f4d45640
AP
382
383.globl OPENSSL_ia32_rdseed
384.type OPENSSL_ia32_rdseed,\@abi-omnipotent
385.align 16
386OPENSSL_ia32_rdseed:
387 mov \$8,%ecx
388.Loop_rdseed:
389 rdseed %rax
390 jc .Lbreak_rdseed
391 loop .Loop_rdseed
392.Lbreak_rdseed:
393 cmp \$0,%rax
394 cmove %rcx,%rax
395 ret
396.size OPENSSL_ia32_rdseed,.-OPENSSL_ia32_rdseed
301799b8
AP
397___
398
5d863367 399close STDOUT; # flush