]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/sparccpuid.S
Remove __cplusplus preamble from internal headers
[thirdparty/openssl.git] / crypto / sparccpuid.S
CommitLineData
44c8a5e2
RS
1! Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
2!
3! Licensed under the OpenSSL license (the "License"). You may not use
4! this file except in compliance with the License. You can obtain a copy
5! in the file LICENSE in the source distribution or at
6! https://www.openssl.org/source/license.html
7
cee73df3
AP
8#if defined(__SUNPRO_C) && defined(__sparcv9)
9# define ABI64 /* They've said -xarch=v9 at command line */
10#elif defined(__GNUC__) && defined(__arch64__)
11# define ABI64 /* They've said -m64 at command line */
12#endif
13
14#ifdef ABI64
15 .register %g2,#scratch
16 .register %g3,#scratch
17# define FRAME -192
18# define BIAS 2047
19#else
20# define FRAME -96
21# define BIAS 0
22#endif
23
24.text
25.align 32
26.global OPENSSL_wipe_cpu
27.type OPENSSL_wipe_cpu,#function
28! Keep in mind that this does not excuse us from wiping the stack!
29! This routine wipes registers, but not the backing store [which
30! resides on the stack, toward lower addresses]. To facilitate for
31! stack wiping I return pointer to the top of stack of the *caller*.
32OPENSSL_wipe_cpu:
33 save %sp,FRAME,%sp
34 nop
35#ifdef __sun
36#include <sys/trap.h>
37 ta ST_CLEAN_WINDOWS
38#else
39 call .walk.reg.wins
40#endif
41 nop
42 call .PIC.zero.up
43 mov .zero-(.-4),%o0
c06b0f3d
AP
44 ld [%o0],%f0
45 ld [%o0],%f1
cee73df3
AP
46
47 subcc %g0,1,%o0
48 ! Following is V9 "rd %ccr,%o0" instruction. However! V8
49 ! specification says that it ("rd %asr2,%o0" in V8 terms) does
50 ! not cause illegal_instruction trap. It therefore can be used
51 ! to determine if the CPU the code is executing on is V8- or
52 ! V9-compliant, as V9 returns a distinct value of 0x99,
53 ! "negative" and "borrow" bits set in both %icc and %xcc.
54 .word 0x91408000 !rd %ccr,%o0
55 cmp %o0,0x99
56 bne .v8
57 nop
58 ! Even though we do not use %fp register bank,
59 ! we wipe it as memcpy might have used it...
60 .word 0xbfa00040 !fmovd %f0,%f62
61 .word 0xbba00040 !...
62 .word 0xb7a00040
63 .word 0xb3a00040
64 .word 0xafa00040
65 .word 0xaba00040
66 .word 0xa7a00040
67 .word 0xa3a00040
68 .word 0x9fa00040
69 .word 0x9ba00040
70 .word 0x97a00040
71 .word 0x93a00040
72 .word 0x8fa00040
73 .word 0x8ba00040
74 .word 0x87a00040
75 .word 0x83a00040 !fmovd %f0,%f32
76.v8: fmovs %f1,%f31
77 clr %o0
78 fmovs %f0,%f30
79 clr %o1
80 fmovs %f1,%f29
81 clr %o2
82 fmovs %f0,%f28
83 clr %o3
84 fmovs %f1,%f27
85 clr %o4
86 fmovs %f0,%f26
87 clr %o5
88 fmovs %f1,%f25
89 clr %o7
90 fmovs %f0,%f24
91 clr %l0
92 fmovs %f1,%f23
93 clr %l1
94 fmovs %f0,%f22
95 clr %l2
96 fmovs %f1,%f21
97 clr %l3
98 fmovs %f0,%f20
99 clr %l4
100 fmovs %f1,%f19
101 clr %l5
102 fmovs %f0,%f18
103 clr %l6
104 fmovs %f1,%f17
105 clr %l7
106 fmovs %f0,%f16
107 clr %i0
108 fmovs %f1,%f15
109 clr %i1
110 fmovs %f0,%f14
111 clr %i2
112 fmovs %f1,%f13
113 clr %i3
114 fmovs %f0,%f12
115 clr %i4
116 fmovs %f1,%f11
117 clr %i5
118 fmovs %f0,%f10
119 clr %g1
120 fmovs %f1,%f9
121 clr %g2
122 fmovs %f0,%f8
123 clr %g3
124 fmovs %f1,%f7
125 clr %g4
126 fmovs %f0,%f6
127 clr %g5
128 fmovs %f1,%f5
129 fmovs %f0,%f4
130 fmovs %f1,%f3
131 fmovs %f0,%f2
132
053fa39a 133 add %fp,BIAS,%i0 ! return pointer to callerĀ“s top of stack
cee73df3
AP
134
135 ret
136 restore
137
138.zero: .long 0x0,0x0
139.PIC.zero.up:
140 retl
141 add %o0,%o7,%o0
142#ifdef DEBUG
143.global walk_reg_wins
144.type walk_reg_wins,#function
145walk_reg_wins:
146#endif
147.walk.reg.wins:
148 save %sp,FRAME,%sp
149 cmp %i7,%o7
150 be 2f
151 clr %o0
152 cmp %o7,0 ! compiler never cleans %o7...
153 be 1f ! could have been a leaf function...
154 clr %o1
155 call .walk.reg.wins
156 nop
1571: clr %o2
158 clr %o3
159 clr %o4
160 clr %o5
161 clr %o7
162 clr %l0
163 clr %l1
164 clr %l2
165 clr %l3
166 clr %l4
167 clr %l5
168 clr %l6
169 clr %l7
170 add %o0,1,%i0 ! used for debugging
1712: ret
172 restore
173.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
174
175.global OPENSSL_atomic_add
176.type OPENSSL_atomic_add,#function
c06b0f3d 177.align 32
cee73df3
AP
178OPENSSL_atomic_add:
179#ifndef ABI64
180 subcc %g0,1,%o2
181 .word 0x95408000 !rd %ccr,%o2, see comment above
182 cmp %o2,0x99
183 be .v9
184 nop
185 save %sp,FRAME,%sp
186 ba .enter
187 nop
188#ifdef __sun
6a79b3cb 189! Note that you do not have to link with libthread to call thr_yield,
cee73df3
AP
190! as libc provides a stub, which is overloaded the moment you link
191! with *either* libpthread or libthread...
192#define YIELD_CPU thr_yield
193#else
194! applies at least to Linux and FreeBSD... Feedback expected...
195#define YIELD_CPU sched_yield
196#endif
197.spin: call YIELD_CPU
198 nop
199.enter: ld [%i0],%i2
200 cmp %i2,-4096
201 be .spin
202 mov -1,%i2
203 swap [%i0],%i2
204 cmp %i2,-1
205 be .spin
206 add %i2,%i1,%i2
207 stbar
208 st %i2,[%i0]
209 sra %i2,%g0,%i0
210 ret
211 restore
212.v9:
213#endif
214 ld [%o0],%o2
2151: add %o1,%o2,%o3
216 .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3
217 cmp %o2,%o3
218 bne 1b
219 mov %o3,%o2 ! cas is always fetching to dest. register
220 add %o1,%o2,%o0 ! OpenSSL expects the new value
221 retl
222 sra %o0,%g0,%o0 ! we return signed int, remember?
223.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
224
a00e414f 225.global _sparcv9_rdtick
c06b0f3d 226.align 32
a00e414f 227_sparcv9_rdtick:
cee73df3
AP
228 subcc %g0,1,%o0
229 .word 0x91408000 !rd %ccr,%o0
230 cmp %o0,0x99
c06b0f3d 231 bne .notick
cee73df3 232 xor %o0,%o0,%o0
c06b0f3d
AP
233 .word 0x91410000 !rd %tick,%o0
234 retl
c32fcca6 235 .word 0x93323020 !srlx %o0,32,%o1
c06b0f3d 236.notick:
cee73df3 237 retl
c06b0f3d 238 xor %o1,%o1,%o1
a00e414f
AP
239.type _sparcv9_rdtick,#function
240.size _sparcv9_rdtick,.-_sparcv9_rdtick
c06b0f3d 241
4b2603e4
AP
242.global _sparcv9_vis1_probe
243.align 8
244_sparcv9_vis1_probe:
4b2603e4 245 add %sp,BIAS+2,%o1
4b2603e4 246 .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0
3caeef94
AP
247 retl
248 .word 0x81b00d80 !fxor %f0,%f0,%f0
4b2603e4
AP
249.type _sparcv9_vis1_probe,#function
250.size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe
251
7c5889bf
AP
252! Probe and instrument VIS1 instruction. Output is number of cycles it
253! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit
254! is slow (documented to be 6 cycles on T2) and the core is in-order
255! single-issue, it should be possible to distinguish Tx reliably...
256! Observed return values are:
257!
4b2603e4 258! UltraSPARC IIe 7
7c5889bf
AP
259! UltraSPARC III 7
260! UltraSPARC T1 24
1fda639a
AP
261! SPARC T4 65(*)
262!
263! (*) result has lesser to do with VIS instruction latencies, rdtick
264! appears that slow, but it does the trick in sense that FP and
265! VIS code paths are still slower than integer-only ones.
7c5889bf
AP
266!
267! Numbers for T2 and SPARC64 V-VII are more than welcomed.
268!
269! It would be possible to detect specifically US-T1 by instrumenting
270! fmul8ulx16, which is emulated on T1 and as such accounts for quite
271! a lot of %tick-s, couple of thousand on Linux...
4b2603e4 272.global _sparcv9_vis1_instrument
c32fcca6 273.align 8
4b2603e4 274_sparcv9_vis1_instrument:
1fda639a
AP
275 .word 0x81b00d80 !fxor %f0,%f0,%f0
276 .word 0x85b08d82 !fxor %f2,%f2,%f2
7c5889bf
AP
277 .word 0x91410000 !rd %tick,%o0
278 .word 0x81b00d80 !fxor %f0,%f0,%f0
279 .word 0x85b08d82 !fxor %f2,%f2,%f2
280 .word 0x93410000 !rd %tick,%o1
281 .word 0x81b00d80 !fxor %f0,%f0,%f0
282 .word 0x85b08d82 !fxor %f2,%f2,%f2
283 .word 0x95410000 !rd %tick,%o2
284 .word 0x81b00d80 !fxor %f0,%f0,%f0
285 .word 0x85b08d82 !fxor %f2,%f2,%f2
286 .word 0x97410000 !rd %tick,%o3
287 .word 0x81b00d80 !fxor %f0,%f0,%f0
288 .word 0x85b08d82 !fxor %f2,%f2,%f2
289 .word 0x99410000 !rd %tick,%o4
290
291 ! calculate intervals
292 sub %o1,%o0,%o0
293 sub %o2,%o1,%o1
294 sub %o3,%o2,%o2
295 sub %o4,%o3,%o3
296
60250017 297 ! find minimum value
7c5889bf
AP
298 cmp %o0,%o1
299 .word 0x38680002 !bgu,a %xcc,.+8
300 mov %o1,%o0
301 cmp %o0,%o2
302 .word 0x38680002 !bgu,a %xcc,.+8
303 mov %o2,%o0
304 cmp %o0,%o3
305 .word 0x38680002 !bgu,a %xcc,.+8
306 mov %o3,%o0
307
c32fcca6 308 retl
4b2603e4
AP
309 nop
310.type _sparcv9_vis1_instrument,#function
311.size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument
312
313.global _sparcv9_vis2_probe
314.align 8
315_sparcv9_vis2_probe:
316 retl
317 .word 0x81b00980 !bshuffle %f0,%f0,%f0
318.type _sparcv9_vis2_probe,#function
319.size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe
320
321.global _sparcv9_fmadd_probe
322.align 8
323_sparcv9_fmadd_probe:
324 .word 0x81b00d80 !fxor %f0,%f0,%f0
325 .word 0x85b08d82 !fxor %f2,%f2,%f2
326 retl
327 .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0
328.type _sparcv9_fmadd_probe,#function
329.size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
c32fcca6 330
1fda639a
AP
331.global _sparcv9_rdcfr
332.align 8
333_sparcv9_rdcfr:
334 retl
335 .word 0x91468000 !rd %asr26,%o0
336.type _sparcv9_rdcfr,#function
337.size _sparcv9_rdcfr,.-_sparcv9_rdcfr
338
339.global _sparcv9_vis3_probe
340.align 8
341_sparcv9_vis3_probe:
342 retl
343 .word 0x81b022a0 !xmulx %g0,%g0,%g0
344.type _sparcv9_vis3_probe,#function
345.size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe
346
347.global _sparcv9_random
348.align 8
349_sparcv9_random:
350 retl
351 .word 0x91b002a0 !random %o0
352.type _sparcv9_random,#function
353.size _sparcv9_random,.-_sparcv9_vis3_probe
354
4400f6c6
AP
355.global _sparcv9_fjaesx_probe
356.align 8
357_sparcv9_fjaesx_probe:
358 .word 0x81b09206 !faesencx %f2,%f6,%f0
359 retl
360 nop
361.size _sparcv9_fjaesx_probe,.-_sparcv9_fjaesx_probe
362
b2dba9bf
AP
363.global OPENSSL_cleanse
364.align 32
365OPENSSL_cleanse:
aa5c99fa 366 cmp %o1,14
b2dba9bf
AP
367 nop
368#ifdef ABI64
369 bgu %xcc,.Lot
370#else
371 bgu .Lot
372#endif
7676eebf
AP
373 cmp %o1,0
374 bne .Little
375 nop
376 retl
b2dba9bf
AP
377 nop
378
379.Little:
380 stb %g0,[%o0]
381 subcc %o1,1,%o1
382 bnz .Little
383 add %o0,1,%o0
384 retl
385 nop
386.align 32
387.Lot:
aa5c99fa
AP
388#ifndef ABI64
389 subcc %g0,1,%g1
390 ! see above for explanation
391 .word 0x83408000 !rd %ccr,%g1
392 cmp %g1,0x99
393 bne .v8lot
394 nop
395#endif
396
397.v9lot: andcc %o0,7,%g0
398 bz .v9aligned
399 nop
400 stb %g0,[%o0]
401 sub %o1,1,%o1
402 ba .v9lot
403 add %o0,1,%o0
404.align 16,0x01000000
405.v9aligned:
406 .word 0xc0720000 !stx %g0,[%o0]
407 sub %o1,8,%o1
408 andcc %o1,-8,%g0
409#ifdef ABI64
410 .word 0x126ffffd !bnz %xcc,.v9aligned
411#else
412 .word 0x124ffffd !bnz %icc,.v9aligned
413#endif
414 add %o0,8,%o0
415
416 cmp %o1,0
417 bne .Little
418 nop
419 retl
420 nop
421#ifndef ABI64
422.v8lot: andcc %o0,3,%g0
423 bz .v8aligned
b2dba9bf
AP
424 nop
425 stb %g0,[%o0]
426 sub %o1,1,%o1
aa5c99fa 427 ba .v8lot
b2dba9bf
AP
428 add %o0,1,%o0
429 nop
aa5c99fa 430.v8aligned:
b2dba9bf
AP
431 st %g0,[%o0]
432 sub %o1,4,%o1
433 andcc %o1,-4,%g0
aa5c99fa 434 bnz .v8aligned
b2dba9bf
AP
435 add %o0,4,%o0
436
437 cmp %o1,0
438 bne .Little
439 nop
440 retl
441 nop
aa5c99fa 442#endif
b2dba9bf
AP
443.type OPENSSL_cleanse,#function
444.size OPENSSL_cleanse,.-OPENSSL_cleanse
445
e33826f0
AP
446.global CRYPTO_memcmp
447.align 16
448CRYPTO_memcmp:
449 cmp %o2,0
450#ifdef ABI64
451 beq,pn %xcc,.Lno_data
452#else
453 beq .Lno_data
454#endif
455 xor %g1,%g1,%g1
456 nop
457
458.Loop_cmp:
459 ldub [%o0],%o3
460 add %o0,1,%o0
461 ldub [%o1],%o4
462 add %o1,1,%o1
463 subcc %o2,1,%o2
464 xor %o3,%o4,%o4
465#ifdef ABI64
466 bnz %xcc,.Loop_cmp
467#else
468 bnz .Loop_cmp
469#endif
470 or %o4,%g1,%g1
471
472 sub %g0,%g1,%g1
473 srl %g1,31,%g1
474.Lno_data:
475 retl
476 mov %g1,%o0
477.type CRYPTO_memcmp,#function
478.size CRYPTO_memcmp,.-CRYPTO_memcmp
479
5fabb88a
AP
480.global _sparcv9_vis1_instrument_bus
481.align 8
482_sparcv9_vis1_instrument_bus:
483 mov %o1,%o3 ! save cnt
484 .word 0x99410000 !rd %tick,%o4 ! tick
485 mov %o4,%o5 ! lasttick = tick
486 set 0,%g4 ! diff
487
488 andn %o0,63,%g1
489 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
490 .word 0x8143e040 !membar #Sync
491 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
492 .word 0x8143e040 !membar #Sync
493 ld [%o0],%o4
494 add %o4,%g4,%g4
495 .word 0xc9e2100c !cas [%o0],%o4,%g4
496
497.Loop: .word 0x99410000 !rd %tick,%o4
498 sub %o4,%o5,%g4 ! diff=tick-lasttick
499 mov %o4,%o5 ! lasttick=tick
500
501 andn %o0,63,%g1
502 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
503 .word 0x8143e040 !membar #Sync
504 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
505 .word 0x8143e040 !membar #Sync
506 ld [%o0],%o4
507 add %o4,%g4,%g4
508 .word 0xc9e2100c !cas [%o0],%o4,%g4
509 subcc %o1,1,%o1 ! --$cnt
510 bnz .Loop
511 add %o0,4,%o0 ! ++$out
512
513 retl
514 mov %o3,%o0
515.type _sparcv9_vis1_instrument_bus,#function
516.size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
517
518.global _sparcv9_vis1_instrument_bus2
519.align 8
520_sparcv9_vis1_instrument_bus2:
521 mov %o1,%o3 ! save cnt
522 sll %o1,2,%o1 ! cnt*=4
523
524 .word 0x99410000 !rd %tick,%o4 ! tick
525 mov %o4,%o5 ! lasttick = tick
526 set 0,%g4 ! diff
527
528 andn %o0,63,%g1
529 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
530 .word 0x8143e040 !membar #Sync
531 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
532 .word 0x8143e040 !membar #Sync
533 ld [%o0],%o4
534 add %o4,%g4,%g4
535 .word 0xc9e2100c !cas [%o0],%o4,%g4
536
537 .word 0x99410000 !rd %tick,%o4 ! tick
538 sub %o4,%o5,%g4 ! diff=tick-lasttick
539 mov %o4,%o5 ! lasttick=tick
540 mov %g4,%g5 ! lastdiff=diff
541.Loop2:
542 andn %o0,63,%g1
543 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
544 .word 0x8143e040 !membar #Sync
545 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
546 .word 0x8143e040 !membar #Sync
547 ld [%o0],%o4
548 add %o4,%g4,%g4
549 .word 0xc9e2100c !cas [%o0],%o4,%g4
550
551 subcc %o2,1,%o2 ! --max
552 bz .Ldone2
553 nop
554
555 .word 0x99410000 !rd %tick,%o4 ! tick
556 sub %o4,%o5,%g4 ! diff=tick-lasttick
557 mov %o4,%o5 ! lasttick=tick
558 cmp %g4,%g5
559 mov %g4,%g5 ! lastdiff=diff
560
561 .word 0x83408000 !rd %ccr,%g1
562 and %g1,4,%g1 ! isolate zero flag
563 xor %g1,4,%g1 ! flip zero flag
564
565 subcc %o1,%g1,%o1 ! conditional --$cnt
566 bnz .Loop2
567 add %o0,%g1,%o0 ! conditional ++$out
568
569.Ldone2:
570 srl %o1,2,%o1
571 retl
572 sub %o3,%o1,%o0
573.type _sparcv9_vis1_instrument_bus2,#function
574.size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
575
c06b0f3d
AP
576.section ".init",#alloc,#execinstr
577 call OPENSSL_cpuid_setup
cee73df3 578 nop