]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/x86_64cpuid.pl
bn/bn_lib.c: add BN_FLG_FIXED_TOP flag.
[thirdparty/openssl.git] / crypto / x86_64cpuid.pl
CommitLineData
14e21f86
AP
1#!/usr/bin/env perl
2
aa8f38e4
AP
3$flavour = shift;
4$output = shift;
5if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
14e21f86 6
aa8f38e4 7$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
376729e1 8
aa8f38e4 9$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10fd0b7b
AP
10( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
11( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
12die "can't locate x86_64-xlate.pl";
13
3f233a1e
AP
14open OUT,"| \"$^X\" $xlate $flavour $output";
15*STDOUT=*OUT;
10fd0b7b
AP
16
17($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
18 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
376729e1 19
aa8f38e4
AP
20print<<___;
21.extern OPENSSL_cpuid_setup
10fd0b7b 22.hidden OPENSSL_cpuid_setup
aa8f38e4
AP
23.section .init
24 call OPENSSL_cpuid_setup
932cc129 25
10fd0b7b 26.hidden OPENSSL_ia32cap_P
d75e384f 27.comm OPENSSL_ia32cap_P,16,4
10fd0b7b 28
14e21f86 29.text
376729e1
AP
30
31.globl OPENSSL_atomic_add
aa8f38e4 32.type OPENSSL_atomic_add,\@abi-omnipotent
376729e1
AP
33.align 16
34OPENSSL_atomic_add:
aa8f38e4
AP
35 movl ($arg1),%eax
36.Lspin: leaq ($arg2,%rax),%r8
37 .byte 0xf0 # lock
38 cmpxchgl %r8d,($arg1)
376729e1 39 jne .Lspin
e442c362 40 movl %r8d,%eax
aa8f38e4 41 .byte 0x48,0x98 # cltq/cdqe
376729e1
AP
42 ret
43.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
44
932cc129
AP
45.globl OPENSSL_rdtsc
46.type OPENSSL_rdtsc,\@abi-omnipotent
47.align 16
48OPENSSL_rdtsc:
49 rdtsc
50 shl \$32,%rdx
51 or %rdx,%rax
52 ret
53.size OPENSSL_rdtsc,.-OPENSSL_rdtsc
54
376729e1 55.globl OPENSSL_ia32_cpuid
d75e384f 56.type OPENSSL_ia32_cpuid,\@function,1
376729e1
AP
57.align 16
58OPENSSL_ia32_cpuid:
10fd0b7b 59 mov %rbx,%r8 # save %rbx
9babf392
AP
60
61 xor %eax,%eax
7eba15f2 62 mov %eax,8(%rdi) # clear extended feature flags
9babf392 63 cpuid
baa5f524
AP
64 mov %eax,%r11d # max value for standard query level
65
9babf392
AP
66 xor %eax,%eax
67 cmp \$0x756e6547,%ebx # "Genu"
68 setne %al
69 mov %eax,%r9d
70 cmp \$0x49656e69,%edx # "ineI"
71 setne %al
72 or %eax,%r9d
73 cmp \$0x6c65746e,%ecx # "ntel"
74 setne %al
baa5f524
AP
75 or %eax,%r9d # 0 indicates Intel CPU
76 jz .Lintel
77
78 cmp \$0x68747541,%ebx # "Auth"
79 setne %al
80 mov %eax,%r10d
81 cmp \$0x69746E65,%edx # "enti"
82 setne %al
83 or %eax,%r10d
84 cmp \$0x444D4163,%ecx # "cAMD"
85 setne %al
86 or %eax,%r10d # 0 indicates AMD CPU
87 jnz .Lintel
88
89 # AMD specific
90 mov \$0x80000000,%eax
91 cpuid
10fd0b7b
AP
92 cmp \$0x80000001,%eax
93 jb .Lintel
94 mov %eax,%r10d
95 mov \$0x80000001,%eax
96 cpuid
97 or %ecx,%r9d
98 and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11
99
100 cmp \$0x80000008,%r10d
baa5f524
AP
101 jb .Lintel
102
103 mov \$0x80000008,%eax
104 cpuid
105 movzb %cl,%r10 # number of cores - 1
106 inc %r10 # number of cores
107
108 mov \$1,%eax
109 cpuid
110 bt \$28,%edx # test hyper-threading bit
10fd0b7b 111 jnc .Lgeneric
baa5f524
AP
112 shr \$16,%ebx # number of logical processors
113 cmp %r10b,%bl
10fd0b7b 114 ja .Lgeneric
baa5f524 115 and \$0xefffffff,%edx # ~(1<<28)
10fd0b7b 116 jmp .Lgeneric
9babf392 117
baa5f524
AP
118.Lintel:
119 cmp \$4,%r11d
120 mov \$-1,%r10d
121 jb .Lnocacheinfo
122
123 mov \$4,%eax
124 mov \$0,%ecx # query L1D
125 cpuid
126 mov %eax,%r10d
127 shr \$14,%r10d
128 and \$0xfff,%r10d # number of cores -1 per L1D
129
130.Lnocacheinfo:
932cc129 131 mov \$1,%eax
376729e1 132 cpuid
10fd0b7b 133 and \$0xbfefffff,%edx # force reserved bits to 0
932cc129 134 cmp \$0,%r9d
9babf392 135 jne .Lnotintel
10fd0b7b 136 or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs
9babf392
AP
137 and \$15,%ah
138 cmp \$15,%ah # examine Family ID
777cf0fb 139 jne .LnotP4
10fd0b7b 140 or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR
777cf0fb
AP
141.LnotP4:
142 cmp \$6,%ah
143 jne .Lnotintel
144 and \$0x0fff0ff0,%eax
145 cmp \$0x00050670,%eax # Knights Landing
146 je .Lknights
147 cmp \$0x00080650,%eax # Knights Mill (according to sde)
148 jne .Lnotintel
149.Lknights:
150 and \$0xfbffffff,%ecx # clear XSAVE flag to mimic Silvermont
151
9babf392 152.Lnotintel:
3df2eff4 153 bt \$28,%edx # test hyper-threading bit
10fd0b7b 154 jnc .Lgeneric
baa5f524
AP
155 and \$0xefffffff,%edx # ~(1<<28)
156 cmp \$0,%r10d
10fd0b7b 157 je .Lgeneric
baa5f524
AP
158
159 or \$0x10000000,%edx # 1<<28
9babf392
AP
160 shr \$16,%ebx
161 cmp \$1,%bl # see if cache is shared
10fd0b7b 162 ja .Lgeneric
932cc129 163 and \$0xefffffff,%edx # ~(1<<28)
10fd0b7b
AP
164.Lgeneric:
165 and \$0x00000800,%r9d # isolate AMD XOP flag
166 and \$0xfffff7ff,%ecx
167 or %ecx,%r9d # merge AMD XOP flag
168
169 mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx
7eba15f2
AP
170
171 cmp \$7,%r11d
172 jb .Lno_extended_info
173 mov \$7,%eax
174 xor %ecx,%ecx
175 cpuid
777cf0fb
AP
176 bt \$26,%r9d # check XSAVE bit, cleared on Knights
177 jc .Lnotknights
178 and \$0xfff7ffff,%ebx # clear ADCX/ADOX flag
179.Lnotknights:
7eba15f2
AP
180 mov %ebx,8(%rdi) # save extended feature flags
181.Lno_extended_info:
182
10fd0b7b
AP
183 bt \$27,%r9d # check OSXSAVE bit
184 jnc .Lclear_avx
185 xor %ecx,%ecx # XCR0
186 .byte 0x0f,0x01,0xd0 # xgetbv
187 and \$6,%eax # isolate XMM and YMM state support
188 cmp \$6,%eax
189 je .Ldone
190.Lclear_avx:
191 mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
192 and %eax,%r9d # clear AVX, FMA and AMD XOP bits
d33b3523 193 andl \$0xffffffdf,8(%rdi) # clear AVX2, ~(1<<5)
9babf392 194.Ldone:
10fd0b7b
AP
195 shl \$32,%r9
196 mov %r10d,%eax
197 mov %r8,%rbx # restore %rbx
198 or %r9,%rax
376729e1
AP
199 ret
200.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
b2dba9bf
AP
201
202.globl OPENSSL_cleanse
aa8f38e4 203.type OPENSSL_cleanse,\@abi-omnipotent
b2dba9bf
AP
204.align 16
205OPENSSL_cleanse:
206 xor %rax,%rax
aa8f38e4 207 cmp \$15,$arg2
b2dba9bf 208 jae .Lot
1d8fa09c
DSH
209 cmp \$0,$arg2
210 je .Lret
b2dba9bf 211.Little:
aa8f38e4
AP
212 mov %al,($arg1)
213 sub \$1,$arg2
214 lea 1($arg1),$arg1
b2dba9bf 215 jnz .Little
9f359287
AP
216.Lret:
217 ret
b2dba9bf
AP
218.align 16
219.Lot:
aa8f38e4 220 test \$7,$arg1
b2dba9bf 221 jz .Laligned
aa8f38e4
AP
222 mov %al,($arg1)
223 lea -1($arg2),$arg2
224 lea 1($arg1),$arg1
b2dba9bf
AP
225 jmp .Lot
226.Laligned:
aa8f38e4
AP
227 mov %rax,($arg1)
228 lea -8($arg2),$arg2
229 test \$-8,$arg2
230 lea 8($arg1),$arg1
b2dba9bf 231 jnz .Laligned
aa8f38e4 232 cmp \$0,$arg2
b2dba9bf
AP
233 jne .Little
234 ret
235.size OPENSSL_cleanse,.-OPENSSL_cleanse
14e21f86 236___
aa8f38e4
AP
237
238print<<___ if (!$win64);
239.globl OPENSSL_wipe_cpu
240.type OPENSSL_wipe_cpu,\@abi-omnipotent
241.align 16
242OPENSSL_wipe_cpu:
243 pxor %xmm0,%xmm0
244 pxor %xmm1,%xmm1
245 pxor %xmm2,%xmm2
246 pxor %xmm3,%xmm3
247 pxor %xmm4,%xmm4
248 pxor %xmm5,%xmm5
249 pxor %xmm6,%xmm6
250 pxor %xmm7,%xmm7
251 pxor %xmm8,%xmm8
252 pxor %xmm9,%xmm9
253 pxor %xmm10,%xmm10
254 pxor %xmm11,%xmm11
255 pxor %xmm12,%xmm12
256 pxor %xmm13,%xmm13
257 pxor %xmm14,%xmm14
258 pxor %xmm15,%xmm15
259 xorq %rcx,%rcx
260 xorq %rdx,%rdx
261 xorq %rsi,%rsi
262 xorq %rdi,%rdi
263 xorq %r8,%r8
264 xorq %r9,%r9
265 xorq %r10,%r10
266 xorq %r11,%r11
267 leaq 8(%rsp),%rax
268 ret
269.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
270___
271print<<___ if ($win64);
272.globl OPENSSL_wipe_cpu
273.type OPENSSL_wipe_cpu,\@abi-omnipotent
274.align 16
275OPENSSL_wipe_cpu:
276 pxor %xmm0,%xmm0
277 pxor %xmm1,%xmm1
278 pxor %xmm2,%xmm2
279 pxor %xmm3,%xmm3
280 pxor %xmm4,%xmm4
281 pxor %xmm5,%xmm5
282 xorq %rcx,%rcx
283 xorq %rdx,%rdx
284 xorq %r8,%r8
285 xorq %r9,%r9
286 xorq %r10,%r10
287 xorq %r11,%r11
288 leaq 8(%rsp),%rax
289 ret
290.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
291___
292
922ac25f
AP
293print<<___;
294.globl OPENSSL_ia32_rdrand
295.type OPENSSL_ia32_rdrand,\@abi-omnipotent
296.align 16
297OPENSSL_ia32_rdrand:
298 mov \$8,%ecx
299.Loop_rdrand:
300 rdrand %rax
301 jc .Lbreak_rdrand
302 loop .Loop_rdrand
303.Lbreak_rdrand:
304 cmp \$0,%rax
305 cmove %rcx,%rax
306 ret
307.size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
104c032b
AP
308
309.globl OPENSSL_ia32_rdseed
310.type OPENSSL_ia32_rdseed,\@abi-omnipotent
311.align 16
312OPENSSL_ia32_rdseed:
313 mov \$8,%ecx
314.Loop_rdseed:
315 rdseed %rax
316 jc .Lbreak_rdseed
317 loop .Loop_rdseed
318.Lbreak_rdseed:
319 cmp \$0,%rax
320 cmove %rcx,%rax
321 ret
322.size OPENSSL_ia32_rdseed,.-OPENSSL_ia32_rdseed
922ac25f
AP
323___
324
5d863367 325close STDOUT; # flush