]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/sparccpuid.S
Changed OPENSSL_gmtime so macOS uses threadsafe gmtime_r instead of gmtime.
[thirdparty/openssl.git] / crypto / sparccpuid.S
1 #if defined(__SUNPRO_C) && defined(__sparcv9)
2 # define ABI64 /* They've said -xarch=v9 at command line */
3 #elif defined(__GNUC__) && defined(__arch64__)
4 # define ABI64 /* They've said -m64 at command line */
5 #endif
6
7 #ifdef ABI64
8 .register %g2,#scratch
9 .register %g3,#scratch
10 # define FRAME -192
11 # define BIAS 2047
12 #else
13 # define FRAME -96
14 # define BIAS 0
15 #endif
16
17 .text
18 .align 32
19 .global OPENSSL_wipe_cpu
20 .type OPENSSL_wipe_cpu,#function
21 ! Keep in mind that this does not excuse us from wiping the stack!
22 ! This routine wipes registers, but not the backing store [which
23 ! resides on the stack, toward lower addresses]. To facilitate for
24 ! stack wiping I return pointer to the top of stack of the *caller*.
25 OPENSSL_wipe_cpu:
26 save %sp,FRAME,%sp
27 nop
28 #ifdef __sun
29 #include <sys/trap.h>
30 ta ST_CLEAN_WINDOWS
31 #else
32 call .walk.reg.wins
33 #endif
34 nop
35 call .PIC.zero.up
36 mov .zero-(.-4),%o0
37 ld [%o0],%f0
38 ld [%o0],%f1
39
40 subcc %g0,1,%o0
41 ! Following is V9 "rd %ccr,%o0" instruction. However! V8
42 ! specification says that it ("rd %asr2,%o0" in V8 terms) does
43 ! not cause illegal_instruction trap. It therefore can be used
44 ! to determine if the CPU the code is executing on is V8- or
45 ! V9-compliant, as V9 returns a distinct value of 0x99,
46 ! "negative" and "borrow" bits set in both %icc and %xcc.
47 .word 0x91408000 !rd %ccr,%o0
48 cmp %o0,0x99
49 bne .v8
50 nop
51 ! Even though we do not use %fp register bank,
52 ! we wipe it as memcpy might have used it...
53 .word 0xbfa00040 !fmovd %f0,%f62
54 .word 0xbba00040 !...
55 .word 0xb7a00040
56 .word 0xb3a00040
57 .word 0xafa00040
58 .word 0xaba00040
59 .word 0xa7a00040
60 .word 0xa3a00040
61 .word 0x9fa00040
62 .word 0x9ba00040
63 .word 0x97a00040
64 .word 0x93a00040
65 .word 0x8fa00040
66 .word 0x8ba00040
67 .word 0x87a00040
68 .word 0x83a00040 !fmovd %f0,%f32
69 .v8: fmovs %f1,%f31
70 clr %o0
71 fmovs %f0,%f30
72 clr %o1
73 fmovs %f1,%f29
74 clr %o2
75 fmovs %f0,%f28
76 clr %o3
77 fmovs %f1,%f27
78 clr %o4
79 fmovs %f0,%f26
80 clr %o5
81 fmovs %f1,%f25
82 clr %o7
83 fmovs %f0,%f24
84 clr %l0
85 fmovs %f1,%f23
86 clr %l1
87 fmovs %f0,%f22
88 clr %l2
89 fmovs %f1,%f21
90 clr %l3
91 fmovs %f0,%f20
92 clr %l4
93 fmovs %f1,%f19
94 clr %l5
95 fmovs %f0,%f18
96 clr %l6
97 fmovs %f1,%f17
98 clr %l7
99 fmovs %f0,%f16
100 clr %i0
101 fmovs %f1,%f15
102 clr %i1
103 fmovs %f0,%f14
104 clr %i2
105 fmovs %f1,%f13
106 clr %i3
107 fmovs %f0,%f12
108 clr %i4
109 fmovs %f1,%f11
110 clr %i5
111 fmovs %f0,%f10
112 clr %g1
113 fmovs %f1,%f9
114 clr %g2
115 fmovs %f0,%f8
116 clr %g3
117 fmovs %f1,%f7
118 clr %g4
119 fmovs %f0,%f6
120 clr %g5
121 fmovs %f1,%f5
122 fmovs %f0,%f4
123 fmovs %f1,%f3
124 fmovs %f0,%f2
125
126 add %fp,BIAS,%i0 ! return pointer to callerĀ“s top of stack
127
128 ret
129 restore
130
131 .zero: .long 0x0,0x0
132 .PIC.zero.up:
133 retl
134 add %o0,%o7,%o0
135 #ifdef DEBUG
136 .global walk_reg_wins
137 .type walk_reg_wins,#function
138 walk_reg_wins:
139 #endif
140 .walk.reg.wins:
141 save %sp,FRAME,%sp
142 cmp %i7,%o7
143 be 2f
144 clr %o0
145 cmp %o7,0 ! compiler never cleans %o7...
146 be 1f ! could have been a leaf function...
147 clr %o1
148 call .walk.reg.wins
149 nop
150 1: clr %o2
151 clr %o3
152 clr %o4
153 clr %o5
154 clr %o7
155 clr %l0
156 clr %l1
157 clr %l2
158 clr %l3
159 clr %l4
160 clr %l5
161 clr %l6
162 clr %l7
163 add %o0,1,%i0 ! used for debugging
164 2: ret
165 restore
166 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
167
168 .global OPENSSL_atomic_add
169 .type OPENSSL_atomic_add,#function
170 .align 32
171 OPENSSL_atomic_add:
172 #ifndef ABI64
173 subcc %g0,1,%o2
174 .word 0x95408000 !rd %ccr,%o2, see comment above
175 cmp %o2,0x99
176 be .v9
177 nop
178 save %sp,FRAME,%sp
179 ba .enter
180 nop
181 #ifdef __sun
182 ! Note that you do not have to link with libthread to call thr_yield,
183 ! as libc provides a stub, which is overloaded the moment you link
184 ! with *either* libpthread or libthread...
185 #define YIELD_CPU thr_yield
186 #else
187 ! applies at least to Linux and FreeBSD... Feedback expected...
188 #define YIELD_CPU sched_yield
189 #endif
190 .spin: call YIELD_CPU
191 nop
192 .enter: ld [%i0],%i2
193 cmp %i2,-4096
194 be .spin
195 mov -1,%i2
196 swap [%i0],%i2
197 cmp %i2,-1
198 be .spin
199 add %i2,%i1,%i2
200 stbar
201 st %i2,[%i0]
202 sra %i2,%g0,%i0
203 ret
204 restore
205 .v9:
206 #endif
207 ld [%o0],%o2
208 1: add %o1,%o2,%o3
209 .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3
210 cmp %o2,%o3
211 bne 1b
212 mov %o3,%o2 ! cas is always fetching to dest. register
213 add %o1,%o2,%o0 ! OpenSSL expects the new value
214 retl
215 sra %o0,%g0,%o0 ! we return signed int, remember?
216 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
217
218 .global _sparcv9_rdtick
219 .align 32
220 _sparcv9_rdtick:
221 subcc %g0,1,%o0
222 .word 0x91408000 !rd %ccr,%o0
223 cmp %o0,0x99
224 bne .notick
225 xor %o0,%o0,%o0
226 .word 0x91410000 !rd %tick,%o0
227 retl
228 .word 0x93323020 !srlx %o0,32,%o1
229 .notick:
230 retl
231 xor %o1,%o1,%o1
232 .type _sparcv9_rdtick,#function
233 .size _sparcv9_rdtick,.-_sparcv9_rdtick
234
235 .global _sparcv9_vis1_probe
236 .align 8
237 _sparcv9_vis1_probe:
238 add %sp,BIAS+2,%o1
239 .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0
240 retl
241 .word 0x81b00d80 !fxor %f0,%f0,%f0
242 .type _sparcv9_vis1_probe,#function
243 .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe
244
245 ! Probe and instrument VIS1 instruction. Output is number of cycles it
246 ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit
247 ! is slow (documented to be 6 cycles on T2) and the core is in-order
248 ! single-issue, it should be possible to distinguish Tx reliably...
249 ! Observed return values are:
250 !
251 ! UltraSPARC IIe 7
252 ! UltraSPARC III 7
253 ! UltraSPARC T1 24
254 ! SPARC T4 65(*)
255 !
256 ! (*) result has lesser to do with VIS instruction latencies, rdtick
257 ! appears that slow, but it does the trick in sense that FP and
258 ! VIS code paths are still slower than integer-only ones.
259 !
260 ! Numbers for T2 and SPARC64 V-VII are more than welcomed.
261 !
262 ! It would be possible to detect specifically US-T1 by instrumenting
263 ! fmul8ulx16, which is emulated on T1 and as such accounts for quite
264 ! a lot of %tick-s, couple of thousand on Linux...
265 .global _sparcv9_vis1_instrument
266 .align 8
267 _sparcv9_vis1_instrument:
268 .word 0x81b00d80 !fxor %f0,%f0,%f0
269 .word 0x85b08d82 !fxor %f2,%f2,%f2
270 .word 0x91410000 !rd %tick,%o0
271 .word 0x81b00d80 !fxor %f0,%f0,%f0
272 .word 0x85b08d82 !fxor %f2,%f2,%f2
273 .word 0x93410000 !rd %tick,%o1
274 .word 0x81b00d80 !fxor %f0,%f0,%f0
275 .word 0x85b08d82 !fxor %f2,%f2,%f2
276 .word 0x95410000 !rd %tick,%o2
277 .word 0x81b00d80 !fxor %f0,%f0,%f0
278 .word 0x85b08d82 !fxor %f2,%f2,%f2
279 .word 0x97410000 !rd %tick,%o3
280 .word 0x81b00d80 !fxor %f0,%f0,%f0
281 .word 0x85b08d82 !fxor %f2,%f2,%f2
282 .word 0x99410000 !rd %tick,%o4
283
284 ! calculate intervals
285 sub %o1,%o0,%o0
286 sub %o2,%o1,%o1
287 sub %o3,%o2,%o2
288 sub %o4,%o3,%o3
289
290 ! find minumum value
291 cmp %o0,%o1
292 .word 0x38680002 !bgu,a %xcc,.+8
293 mov %o1,%o0
294 cmp %o0,%o2
295 .word 0x38680002 !bgu,a %xcc,.+8
296 mov %o2,%o0
297 cmp %o0,%o3
298 .word 0x38680002 !bgu,a %xcc,.+8
299 mov %o3,%o0
300
301 retl
302 nop
303 .type _sparcv9_vis1_instrument,#function
304 .size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument
305
306 .global _sparcv9_vis2_probe
307 .align 8
308 _sparcv9_vis2_probe:
309 retl
310 .word 0x81b00980 !bshuffle %f0,%f0,%f0
311 .type _sparcv9_vis2_probe,#function
312 .size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe
313
314 .global _sparcv9_fmadd_probe
315 .align 8
316 _sparcv9_fmadd_probe:
317 .word 0x81b00d80 !fxor %f0,%f0,%f0
318 .word 0x85b08d82 !fxor %f2,%f2,%f2
319 retl
320 .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0
321 .type _sparcv9_fmadd_probe,#function
322 .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
323
324 .global _sparcv9_rdcfr
325 .align 8
326 _sparcv9_rdcfr:
327 retl
328 .word 0x91468000 !rd %asr26,%o0
329 .type _sparcv9_rdcfr,#function
330 .size _sparcv9_rdcfr,.-_sparcv9_rdcfr
331
332 .global _sparcv9_vis3_probe
333 .align 8
334 _sparcv9_vis3_probe:
335 retl
336 .word 0x81b022a0 !xmulx %g0,%g0,%g0
337 .type _sparcv9_vis3_probe,#function
338 .size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe
339
340 .global _sparcv9_random
341 .align 8
342 _sparcv9_random:
343 retl
344 .word 0x91b002a0 !random %o0
345 .type _sparcv9_random,#function
346 .size _sparcv9_random,.-_sparcv9_vis3_probe
347
348 .global OPENSSL_cleanse
349 .align 32
350 OPENSSL_cleanse:
351 cmp %o1,14
352 nop
353 #ifdef ABI64
354 bgu %xcc,.Lot
355 #else
356 bgu .Lot
357 #endif
358 cmp %o1,0
359 bne .Little
360 nop
361 retl
362 nop
363
364 .Little:
365 stb %g0,[%o0]
366 subcc %o1,1,%o1
367 bnz .Little
368 add %o0,1,%o0
369 retl
370 nop
371 .align 32
372 .Lot:
373 #ifndef ABI64
374 subcc %g0,1,%g1
375 ! see above for explanation
376 .word 0x83408000 !rd %ccr,%g1
377 cmp %g1,0x99
378 bne .v8lot
379 nop
380 #endif
381
382 .v9lot: andcc %o0,7,%g0
383 bz .v9aligned
384 nop
385 stb %g0,[%o0]
386 sub %o1,1,%o1
387 ba .v9lot
388 add %o0,1,%o0
389 .align 16,0x01000000
390 .v9aligned:
391 .word 0xc0720000 !stx %g0,[%o0]
392 sub %o1,8,%o1
393 andcc %o1,-8,%g0
394 #ifdef ABI64
395 .word 0x126ffffd !bnz %xcc,.v9aligned
396 #else
397 .word 0x124ffffd !bnz %icc,.v9aligned
398 #endif
399 add %o0,8,%o0
400
401 cmp %o1,0
402 bne .Little
403 nop
404 retl
405 nop
406 #ifndef ABI64
407 .v8lot: andcc %o0,3,%g0
408 bz .v8aligned
409 nop
410 stb %g0,[%o0]
411 sub %o1,1,%o1
412 ba .v8lot
413 add %o0,1,%o0
414 nop
415 .v8aligned:
416 st %g0,[%o0]
417 sub %o1,4,%o1
418 andcc %o1,-4,%g0
419 bnz .v8aligned
420 add %o0,4,%o0
421
422 cmp %o1,0
423 bne .Little
424 nop
425 retl
426 nop
427 #endif
428 .type OPENSSL_cleanse,#function
429 .size OPENSSL_cleanse,.-OPENSSL_cleanse
430
431 .global _sparcv9_vis1_instrument_bus
432 .weak _sparcv9_vis1_instrument_bus
433 .align 8
434 _sparcv9_vis1_instrument_bus:
435 mov %o1,%o3 ! save cnt
436 .word 0x99410000 !rd %tick,%o4 ! tick
437 mov %o4,%o5 ! lasttick = tick
438 set 0,%g4 ! diff
439
440 andn %o0,63,%g1
441 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
442 .word 0x8143e040 !membar #Sync
443 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
444 .word 0x8143e040 !membar #Sync
445 ld [%o0],%o4
446 add %o4,%g4,%g4
447 .word 0xc9e2100c !cas [%o0],%o4,%g4
448
449 .Loop: .word 0x99410000 !rd %tick,%o4
450 sub %o4,%o5,%g4 ! diff=tick-lasttick
451 mov %o4,%o5 ! lasttick=tick
452
453 andn %o0,63,%g1
454 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
455 .word 0x8143e040 !membar #Sync
456 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
457 .word 0x8143e040 !membar #Sync
458 ld [%o0],%o4
459 add %o4,%g4,%g4
460 .word 0xc9e2100c !cas [%o0],%o4,%g4
461 subcc %o1,1,%o1 ! --$cnt
462 bnz .Loop
463 add %o0,4,%o0 ! ++$out
464
465 retl
466 mov %o3,%o0
467 .type _sparcv9_vis1_instrument_bus,#function
468 .size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
469
470 .global _sparcv9_vis1_instrument_bus2
471 .weak _sparcv9_vis1_instrument_bus2
472 .align 8
473 _sparcv9_vis1_instrument_bus2:
474 mov %o1,%o3 ! save cnt
475 sll %o1,2,%o1 ! cnt*=4
476
477 .word 0x99410000 !rd %tick,%o4 ! tick
478 mov %o4,%o5 ! lasttick = tick
479 set 0,%g4 ! diff
480
481 andn %o0,63,%g1
482 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
483 .word 0x8143e040 !membar #Sync
484 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
485 .word 0x8143e040 !membar #Sync
486 ld [%o0],%o4
487 add %o4,%g4,%g4
488 .word 0xc9e2100c !cas [%o0],%o4,%g4
489
490 .word 0x99410000 !rd %tick,%o4 ! tick
491 sub %o4,%o5,%g4 ! diff=tick-lasttick
492 mov %o4,%o5 ! lasttick=tick
493 mov %g4,%g5 ! lastdiff=diff
494 .Loop2:
495 andn %o0,63,%g1
496 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
497 .word 0x8143e040 !membar #Sync
498 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
499 .word 0x8143e040 !membar #Sync
500 ld [%o0],%o4
501 add %o4,%g4,%g4
502 .word 0xc9e2100c !cas [%o0],%o4,%g4
503
504 subcc %o2,1,%o2 ! --max
505 bz .Ldone2
506 nop
507
508 .word 0x99410000 !rd %tick,%o4 ! tick
509 sub %o4,%o5,%g4 ! diff=tick-lasttick
510 mov %o4,%o5 ! lasttick=tick
511 cmp %g4,%g5
512 mov %g4,%g5 ! lastdiff=diff
513
514 .word 0x83408000 !rd %ccr,%g1
515 and %g1,4,%g1 ! isolate zero flag
516 xor %g1,4,%g1 ! flip zero flag
517
518 subcc %o1,%g1,%o1 ! conditional --$cnt
519 bnz .Loop2
520 add %o0,%g1,%o0 ! conditional ++$out
521
522 .Ldone2:
523 srl %o1,2,%o1
524 retl
525 sub %o3,%o1,%o0
526 .type _sparcv9_vis1_instrument_bus2,#function
527 .size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
528
529 .section ".init",#alloc,#execinstr
530 call OPENSSL_cpuid_setup
531 nop