]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/x86_64cpuid.pl
x86_64cpuid.pl: allow shared build to work without -Bsymbolic.
[thirdparty/openssl.git] / crypto / x86_64cpuid.pl
1 #!/usr/bin/env perl
2
3 $flavour = shift;
4 $output = shift;
5 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
6
7 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
8
9 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10 open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output";
11
12 ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
13 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
14
15 print<<___;
16 .extern OPENSSL_cpuid_setup
17 .hidden OPENSSL_cpuid_setup
18 .section .init
19 call OPENSSL_cpuid_setup
20
21 .hidden OPENSSL_ia32cap_P
22 .comm OPENSSL_ia32cap_P,8
23
24 .text
25
26 .globl OPENSSL_atomic_add
27 .type OPENSSL_atomic_add,\@abi-omnipotent
28 .align 16
29 OPENSSL_atomic_add:
30 movl ($arg1),%eax
31 .Lspin: leaq ($arg2,%rax),%r8
32 .byte 0xf0 # lock
33 cmpxchgl %r8d,($arg1)
34 jne .Lspin
35 movl %r8d,%eax
36 .byte 0x48,0x98 # cltq/cdqe
37 ret
38 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
39
40 .globl OPENSSL_rdtsc
41 .type OPENSSL_rdtsc,\@abi-omnipotent
42 .align 16
43 OPENSSL_rdtsc:
44 rdtsc
45 shl \$32,%rdx
46 or %rdx,%rax
47 ret
48 .size OPENSSL_rdtsc,.-OPENSSL_rdtsc
49
50 .globl OPENSSL_ia32_cpuid
51 .type OPENSSL_ia32_cpuid,\@abi-omnipotent
52 .align 16
53 OPENSSL_ia32_cpuid:
54 mov %rbx,%r8 # save %rbx
55
56 xor %eax,%eax
57 cpuid
58 mov %eax,%r11d # max value for standard query level
59
60 xor %eax,%eax
61 cmp \$0x756e6547,%ebx # "Genu"
62 setne %al
63 mov %eax,%r9d
64 cmp \$0x49656e69,%edx # "ineI"
65 setne %al
66 or %eax,%r9d
67 cmp \$0x6c65746e,%ecx # "ntel"
68 setne %al
69 or %eax,%r9d # 0 indicates Intel CPU
70 jz .Lintel
71
72 cmp \$0x68747541,%ebx # "Auth"
73 setne %al
74 mov %eax,%r10d
75 cmp \$0x69746E65,%edx # "enti"
76 setne %al
77 or %eax,%r10d
78 cmp \$0x444D4163,%ecx # "cAMD"
79 setne %al
80 or %eax,%r10d # 0 indicates AMD CPU
81 jnz .Lintel
82
83 # AMD specific
84 mov \$0x80000000,%eax
85 cpuid
86 cmp \$0x80000001,%eax
87 jb .Lintel
88 mov %eax,%r10d
89 mov \$0x80000001,%eax
90 cpuid
91 or %ecx,%r9d
92 and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11
93
94 cmp \$0x80000008,%r10d
95 jb .Lintel
96
97 mov \$0x80000008,%eax
98 cpuid
99 movzb %cl,%r10 # number of cores - 1
100 inc %r10 # number of cores
101
102 mov \$1,%eax
103 cpuid
104 bt \$28,%edx # test hyper-threading bit
105 jnc .Lgeneric
106 shr \$16,%ebx # number of logical processors
107 cmp %r10b,%bl
108 ja .Lgeneric
109 and \$0xefffffff,%edx # ~(1<<28)
110 jmp .Lgeneric
111
112 .Lintel:
113 cmp \$4,%r11d
114 mov \$-1,%r10d
115 jb .Lnocacheinfo
116
117 mov \$4,%eax
118 mov \$0,%ecx # query L1D
119 cpuid
120 mov %eax,%r10d
121 shr \$14,%r10d
122 and \$0xfff,%r10d # number of cores -1 per L1D
123
124 .Lnocacheinfo:
125 mov \$1,%eax
126 cpuid
127 cmp \$0,%r9d
128 jne .Lnotintel
129 or \$0x00100000,%edx # use reserved 20th bit to engage RC4_CHAR
130 and \$15,%ah
131 cmp \$15,%ah # examine Family ID
132 je .Lnotintel
133 or \$0x40000000,%edx # use reserved bit to skip unrolled loop
134 .Lnotintel:
135 bt \$28,%edx # test hyper-threading bit
136 jnc .Lgeneric
137 and \$0xefffffff,%edx # ~(1<<28)
138 cmp \$0,%r10d
139 je .Lgeneric
140
141 or \$0x10000000,%edx # 1<<28
142 shr \$16,%ebx
143 cmp \$1,%bl # see if cache is shared
144 ja .Lgeneric
145 and \$0xefffffff,%edx # ~(1<<28)
146 .Lgeneric:
147 and \$0x00000800,%r9d # isolate AMD XOP flag
148 and \$0xfffff7ff,%ecx
149 or %r9d,%ecx # merge AMD XOP flag
150
151 shl \$32,%rcx
152 mov %edx,%ebx
153 or %rcx,%rbx # compose capability vector in %rbx
154 bt \$27+32,%rcx # check OSXSAVE bit
155 jnc .Lclear_avx
156 xor %ecx,%ecx # XCR0
157 .byte 0x0f,0x01,0xd0 # xgetbv
158 and \$6,%eax # isolate XMM and YMM state support
159 cmp \$6,%eax
160 je .Ldone
161 .Lclear_avx:
162 mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
163 shl \$32,%rax
164 and %rax,%rbx # clear AVX, FMA and AMD XOP bits
165 .Ldone:
166 mov %rbx,%rax
167 mov %r8,%rbx # restore %rbx
168 ret
169 .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
170
171 .globl OPENSSL_cleanse
172 .type OPENSSL_cleanse,\@abi-omnipotent
173 .align 16
174 OPENSSL_cleanse:
175 xor %rax,%rax
176 cmp \$15,$arg2
177 jae .Lot
178 cmp \$0,$arg2
179 je .Lret
180 .Little:
181 mov %al,($arg1)
182 sub \$1,$arg2
183 lea 1($arg1),$arg1
184 jnz .Little
185 .Lret:
186 ret
187 .align 16
188 .Lot:
189 test \$7,$arg1
190 jz .Laligned
191 mov %al,($arg1)
192 lea -1($arg2),$arg2
193 lea 1($arg1),$arg1
194 jmp .Lot
195 .Laligned:
196 mov %rax,($arg1)
197 lea -8($arg2),$arg2
198 test \$-8,$arg2
199 lea 8($arg1),$arg1
200 jnz .Laligned
201 cmp \$0,$arg2
202 jne .Little
203 ret
204 .size OPENSSL_cleanse,.-OPENSSL_cleanse
205 ___
206
207 print<<___ if (!$win64);
208 .globl OPENSSL_wipe_cpu
209 .type OPENSSL_wipe_cpu,\@abi-omnipotent
210 .align 16
211 OPENSSL_wipe_cpu:
212 pxor %xmm0,%xmm0
213 pxor %xmm1,%xmm1
214 pxor %xmm2,%xmm2
215 pxor %xmm3,%xmm3
216 pxor %xmm4,%xmm4
217 pxor %xmm5,%xmm5
218 pxor %xmm6,%xmm6
219 pxor %xmm7,%xmm7
220 pxor %xmm8,%xmm8
221 pxor %xmm9,%xmm9
222 pxor %xmm10,%xmm10
223 pxor %xmm11,%xmm11
224 pxor %xmm12,%xmm12
225 pxor %xmm13,%xmm13
226 pxor %xmm14,%xmm14
227 pxor %xmm15,%xmm15
228 xorq %rcx,%rcx
229 xorq %rdx,%rdx
230 xorq %rsi,%rsi
231 xorq %rdi,%rdi
232 xorq %r8,%r8
233 xorq %r9,%r9
234 xorq %r10,%r10
235 xorq %r11,%r11
236 leaq 8(%rsp),%rax
237 ret
238 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
239 ___
240 print<<___ if ($win64);
241 .globl OPENSSL_wipe_cpu
242 .type OPENSSL_wipe_cpu,\@abi-omnipotent
243 .align 16
244 OPENSSL_wipe_cpu:
245 pxor %xmm0,%xmm0
246 pxor %xmm1,%xmm1
247 pxor %xmm2,%xmm2
248 pxor %xmm3,%xmm3
249 pxor %xmm4,%xmm4
250 pxor %xmm5,%xmm5
251 xorq %rcx,%rcx
252 xorq %rdx,%rdx
253 xorq %r8,%r8
254 xorq %r9,%r9
255 xorq %r10,%r10
256 xorq %r11,%r11
257 leaq 8(%rsp),%rax
258 ret
259 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
260 ___
261 {
262 my $out="%r10";
263 my $cnt="%rcx";
264 my $max="%r11";
265 my $lasttick="%r8d";
266 my $lastdiff="%r9d";
267 my $redzone=win64?8:-8;
268
269 print<<___;
270 .globl OPENSSL_instrument_bus
271 .type OPENSSL_instrument_bus,\@abi-omnipotent
272 .align 16
273 OPENSSL_instrument_bus:
274 mov $arg1,$out # tribute to Win64
275 mov $arg2,$cnt
276 mov $arg2,$max
277
278 rdtsc # collect 1st tick
279 mov %eax,$lasttick # lasttick = tick
280 mov \$0,$lastdiff # lastdiff = 0
281 clflush ($out)
282 .byte 0xf0 # lock
283 add $lastdiff,($out)
284 jmp .Loop
285 .align 16
286 .Loop: rdtsc
287 mov %eax,%edx
288 sub $lasttick,%eax
289 mov %edx,$lasttick
290 mov %eax,$lastdiff
291 clflush ($out)
292 .byte 0xf0 # lock
293 add %eax,($out)
294 lea 4($out),$out
295 sub \$1,$cnt
296 jnz .Loop
297
298 mov $max,%rax
299 ret
300 .size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
301
302 .globl OPENSSL_instrument_bus2
303 .type OPENSSL_instrument_bus2,\@abi-omnipotent
304 .align 16
305 OPENSSL_instrument_bus2:
306 mov $arg1,$out # tribute to Win64
307 mov $arg2,$cnt
308 mov $arg3,$max
309 mov $cnt,$redzone(%rsp)
310
311 rdtsc # collect 1st tick
312 mov %eax,$lasttick # lasttick = tick
313 mov \$0,$lastdiff # lastdiff = 0
314
315 clflush ($out)
316 .byte 0xf0 # lock
317 add $lastdiff,($out)
318
319 rdtsc # collect 1st diff
320 mov %eax,%edx
321 sub $lasttick,%eax # diff
322 mov %edx,$lasttick # lasttick = tick
323 mov %eax,$lastdiff # lastdiff = diff
324 .Loop2:
325 clflush ($out)
326 .byte 0xf0 # lock
327 add %eax,($out) # accumulate diff
328
329 sub \$1,$max
330 jz .Ldone2
331
332 rdtsc
333 mov %eax,%edx
334 sub $lasttick,%eax # diff
335 mov %edx,$lasttick # lasttick = tick
336 cmp $lastdiff,%eax
337 mov %eax,$lastdiff # lastdiff = diff
338 mov \$0,%edx
339 setne %dl
340 sub %rdx,$cnt # conditional --$cnt
341 lea ($out,%rdx,4),$out # conditional ++$out
342 jnz .Loop2
343
344 .Ldone2:
345 mov $redzone(%rsp),%rax
346 sub $cnt,%rax
347 ret
348 .size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
349 ___
350 }
351
352 close STDOUT; # flush