]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/sparccpuid.S
sparcv9cap.c: add SPARC-T4 feature detection.
[thirdparty/openssl.git] / crypto / sparccpuid.S
1 #ifdef OPENSSL_FIPSCANISTER
2 #include <openssl/fipssyms.h>
3 #endif
4
5 #if defined(__SUNPRO_C) && defined(__sparcv9)
6 # define ABI64 /* They've said -xarch=v9 at command line */
7 #elif defined(__GNUC__) && defined(__arch64__)
8 # define ABI64 /* They've said -m64 at command line */
9 #endif
10
11 #ifdef ABI64
12 .register %g2,#scratch
13 .register %g3,#scratch
14 # define FRAME -192
15 # define BIAS 2047
16 #else
17 # define FRAME -96
18 # define BIAS 0
19 #endif
20
21 .text
22 .align 32
23 .global OPENSSL_wipe_cpu
24 .type OPENSSL_wipe_cpu,#function
25 ! Keep in mind that this does not excuse us from wiping the stack!
26 ! This routine wipes registers, but not the backing store [which
27 ! resides on the stack, toward lower addresses]. To facilitate for
28 ! stack wiping I return pointer to the top of stack of the *caller*.
29 OPENSSL_wipe_cpu:
30 save %sp,FRAME,%sp
31 nop
32 #ifdef __sun
33 #include <sys/trap.h>
34 ta ST_CLEAN_WINDOWS
35 #else
36 call .walk.reg.wins
37 #endif
38 nop
39 call .PIC.zero.up
40 mov .zero-(.-4),%o0
41 ld [%o0],%f0
42 ld [%o0],%f1
43
44 subcc %g0,1,%o0
45 ! Following is V9 "rd %ccr,%o0" instruction. However! V8
46 ! specification says that it ("rd %asr2,%o0" in V8 terms) does
47 ! not cause illegal_instruction trap. It therefore can be used
48 ! to determine if the CPU the code is executing on is V8- or
49 ! V9-compliant, as V9 returns a distinct value of 0x99,
50 ! "negative" and "borrow" bits set in both %icc and %xcc.
51 .word 0x91408000 !rd %ccr,%o0
52 cmp %o0,0x99
53 bne .v8
54 nop
55 ! Even though we do not use %fp register bank,
56 ! we wipe it as memcpy might have used it...
57 .word 0xbfa00040 !fmovd %f0,%f62
58 .word 0xbba00040 !...
59 .word 0xb7a00040
60 .word 0xb3a00040
61 .word 0xafa00040
62 .word 0xaba00040
63 .word 0xa7a00040
64 .word 0xa3a00040
65 .word 0x9fa00040
66 .word 0x9ba00040
67 .word 0x97a00040
68 .word 0x93a00040
69 .word 0x8fa00040
70 .word 0x8ba00040
71 .word 0x87a00040
72 .word 0x83a00040 !fmovd %f0,%f32
73 .v8: fmovs %f1,%f31
74 clr %o0
75 fmovs %f0,%f30
76 clr %o1
77 fmovs %f1,%f29
78 clr %o2
79 fmovs %f0,%f28
80 clr %o3
81 fmovs %f1,%f27
82 clr %o4
83 fmovs %f0,%f26
84 clr %o5
85 fmovs %f1,%f25
86 clr %o7
87 fmovs %f0,%f24
88 clr %l0
89 fmovs %f1,%f23
90 clr %l1
91 fmovs %f0,%f22
92 clr %l2
93 fmovs %f1,%f21
94 clr %l3
95 fmovs %f0,%f20
96 clr %l4
97 fmovs %f1,%f19
98 clr %l5
99 fmovs %f0,%f18
100 clr %l6
101 fmovs %f1,%f17
102 clr %l7
103 fmovs %f0,%f16
104 clr %i0
105 fmovs %f1,%f15
106 clr %i1
107 fmovs %f0,%f14
108 clr %i2
109 fmovs %f1,%f13
110 clr %i3
111 fmovs %f0,%f12
112 clr %i4
113 fmovs %f1,%f11
114 clr %i5
115 fmovs %f0,%f10
116 clr %g1
117 fmovs %f1,%f9
118 clr %g2
119 fmovs %f0,%f8
120 clr %g3
121 fmovs %f1,%f7
122 clr %g4
123 fmovs %f0,%f6
124 clr %g5
125 fmovs %f1,%f5
126 fmovs %f0,%f4
127 fmovs %f1,%f3
128 fmovs %f0,%f2
129
130 add %fp,BIAS,%i0 ! return pointer to callerĀ“s top of stack
131
132 ret
133 restore
134
135 .zero: .long 0x0,0x0
136 .PIC.zero.up:
137 retl
138 add %o0,%o7,%o0
139 #ifdef DEBUG
140 .global walk_reg_wins
141 .type walk_reg_wins,#function
142 walk_reg_wins:
143 #endif
144 .walk.reg.wins:
145 save %sp,FRAME,%sp
146 cmp %i7,%o7
147 be 2f
148 clr %o0
149 cmp %o7,0 ! compiler never cleans %o7...
150 be 1f ! could have been a leaf function...
151 clr %o1
152 call .walk.reg.wins
153 nop
154 1: clr %o2
155 clr %o3
156 clr %o4
157 clr %o5
158 clr %o7
159 clr %l0
160 clr %l1
161 clr %l2
162 clr %l3
163 clr %l4
164 clr %l5
165 clr %l6
166 clr %l7
167 add %o0,1,%i0 ! used for debugging
168 2: ret
169 restore
170 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
171
172 .global OPENSSL_atomic_add
173 .type OPENSSL_atomic_add,#function
174 .align 32
175 OPENSSL_atomic_add:
176 #ifndef ABI64
177 subcc %g0,1,%o2
178 .word 0x95408000 !rd %ccr,%o2, see comment above
179 cmp %o2,0x99
180 be .v9
181 nop
182 save %sp,FRAME,%sp
183 ba .enter
184 nop
185 #ifdef __sun
186 ! Note that you do not have to link with libthread to call thr_yield,
187 ! as libc provides a stub, which is overloaded the moment you link
188 ! with *either* libpthread or libthread...
189 #define YIELD_CPU thr_yield
190 #else
191 ! applies at least to Linux and FreeBSD... Feedback expected...
192 #define YIELD_CPU sched_yield
193 #endif
194 .spin: call YIELD_CPU
195 nop
196 .enter: ld [%i0],%i2
197 cmp %i2,-4096
198 be .spin
199 mov -1,%i2
200 swap [%i0],%i2
201 cmp %i2,-1
202 be .spin
203 add %i2,%i1,%i2
204 stbar
205 st %i2,[%i0]
206 sra %i2,%g0,%i0
207 ret
208 restore
209 .v9:
210 #endif
211 ld [%o0],%o2
212 1: add %o1,%o2,%o3
213 .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3
214 cmp %o2,%o3
215 bne 1b
216 mov %o3,%o2 ! cas is always fetching to dest. register
217 add %o1,%o2,%o0 ! OpenSSL expects the new value
218 retl
219 sra %o0,%g0,%o0 ! we return signed int, remember?
220 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
221
222 .global _sparcv9_rdtick
223 .align 32
224 _sparcv9_rdtick:
225 subcc %g0,1,%o0
226 .word 0x91408000 !rd %ccr,%o0
227 cmp %o0,0x99
228 bne .notick
229 xor %o0,%o0,%o0
230 .word 0x91410000 !rd %tick,%o0
231 retl
232 .word 0x93323020 !srlx %o0,32,%o1
233 .notick:
234 retl
235 xor %o1,%o1,%o1
236 .type _sparcv9_rdtick,#function
237 .size _sparcv9_rdtick,.-_sparcv9_rdtick
238
239 .global _sparcv9_vis1_probe
240 .align 8
241 _sparcv9_vis1_probe:
242 .word 0x81b00d80 !fxor %f0,%f0,%f0
243 add %sp,BIAS+2,%o1
244 retl
245 .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0
246 .type _sparcv9_vis1_probe,#function
247 .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe
248
249 ! Probe and instrument VIS1 instruction. Output is number of cycles it
250 ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit
251 ! is slow (documented to be 6 cycles on T2) and the core is in-order
252 ! single-issue, it should be possible to distinguish Tx reliably...
253 ! Observed return values are:
254 !
255 ! UltraSPARC IIe 7
256 ! UltraSPARC III 7
257 ! UltraSPARC T1 24
258 ! SPARC T4 65(*)
259 !
260 ! (*) result has lesser to do with VIS instruction latencies, rdtick
261 ! appears that slow, but it does the trick in sense that FP and
262 ! VIS code paths are still slower than integer-only ones.
263 !
264 ! Numbers for T2 and SPARC64 V-VII are more than welcomed.
265 !
266 ! It would be possible to detect specifically US-T1 by instrumenting
267 ! fmul8ulx16, which is emulated on T1 and as such accounts for quite
268 ! a lot of %tick-s, couple of thousand on Linux...
269 .global _sparcv9_vis1_instrument
270 .align 8
271 _sparcv9_vis1_instrument:
272 .word 0x81b00d80 !fxor %f0,%f0,%f0
273 .word 0x85b08d82 !fxor %f2,%f2,%f2
274 .word 0x91410000 !rd %tick,%o0
275 .word 0x81b00d80 !fxor %f0,%f0,%f0
276 .word 0x85b08d82 !fxor %f2,%f2,%f2
277 .word 0x93410000 !rd %tick,%o1
278 .word 0x81b00d80 !fxor %f0,%f0,%f0
279 .word 0x85b08d82 !fxor %f2,%f2,%f2
280 .word 0x95410000 !rd %tick,%o2
281 .word 0x81b00d80 !fxor %f0,%f0,%f0
282 .word 0x85b08d82 !fxor %f2,%f2,%f2
283 .word 0x97410000 !rd %tick,%o3
284 .word 0x81b00d80 !fxor %f0,%f0,%f0
285 .word 0x85b08d82 !fxor %f2,%f2,%f2
286 .word 0x99410000 !rd %tick,%o4
287
288 ! calculate intervals
289 sub %o1,%o0,%o0
290 sub %o2,%o1,%o1
291 sub %o3,%o2,%o2
292 sub %o4,%o3,%o3
293
294 ! find minumum value
295 cmp %o0,%o1
296 .word 0x38680002 !bgu,a %xcc,.+8
297 mov %o1,%o0
298 cmp %o0,%o2
299 .word 0x38680002 !bgu,a %xcc,.+8
300 mov %o2,%o0
301 cmp %o0,%o3
302 .word 0x38680002 !bgu,a %xcc,.+8
303 mov %o3,%o0
304
305 retl
306 nop
307 .type _sparcv9_vis1_instrument,#function
308 .size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument
309
310 .global _sparcv9_vis2_probe
311 .align 8
312 _sparcv9_vis2_probe:
313 retl
314 .word 0x81b00980 !bshuffle %f0,%f0,%f0
315 .type _sparcv9_vis2_probe,#function
316 .size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe
317
318 .global _sparcv9_fmadd_probe
319 .align 8
320 _sparcv9_fmadd_probe:
321 .word 0x81b00d80 !fxor %f0,%f0,%f0
322 .word 0x85b08d82 !fxor %f2,%f2,%f2
323 retl
324 .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0
325 .type _sparcv9_fmadd_probe,#function
326 .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
327
328 .global _sparcv9_rdcfr
329 .align 8
330 _sparcv9_rdcfr:
331 retl
332 .word 0x91468000 !rd %asr26,%o0
333 .type _sparcv9_rdcfr,#function
334 .size _sparcv9_rdcfr,.-_sparcv9_rdcfr
335
336 .global _sparcv9_vis3_probe
337 .align 8
338 _sparcv9_vis3_probe:
339 retl
340 .word 0x81b022a0 !xmulx %g0,%g0,%g0
341 .type _sparcv9_vis3_probe,#function
342 .size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe
343
344 .global _sparcv9_random
345 .align 8
346 _sparcv9_random:
347 retl
348 .word 0x91b002a0 !random %o0
349 .type _sparcv9_random,#function
350 .size _sparcv9_random,.-_sparcv9_vis3_probe
351
352 .global OPENSSL_cleanse
353 .align 32
354 OPENSSL_cleanse:
355 cmp %o1,14
356 nop
357 #ifdef ABI64
358 bgu %xcc,.Lot
359 #else
360 bgu .Lot
361 #endif
362 cmp %o1,0
363 bne .Little
364 nop
365 retl
366 nop
367
368 .Little:
369 stb %g0,[%o0]
370 subcc %o1,1,%o1
371 bnz .Little
372 add %o0,1,%o0
373 retl
374 nop
375 .align 32
376 .Lot:
377 #ifndef ABI64
378 subcc %g0,1,%g1
379 ! see above for explanation
380 .word 0x83408000 !rd %ccr,%g1
381 cmp %g1,0x99
382 bne .v8lot
383 nop
384 #endif
385
386 .v9lot: andcc %o0,7,%g0
387 bz .v9aligned
388 nop
389 stb %g0,[%o0]
390 sub %o1,1,%o1
391 ba .v9lot
392 add %o0,1,%o0
393 .align 16,0x01000000
394 .v9aligned:
395 .word 0xc0720000 !stx %g0,[%o0]
396 sub %o1,8,%o1
397 andcc %o1,-8,%g0
398 #ifdef ABI64
399 .word 0x126ffffd !bnz %xcc,.v9aligned
400 #else
401 .word 0x124ffffd !bnz %icc,.v9aligned
402 #endif
403 add %o0,8,%o0
404
405 cmp %o1,0
406 bne .Little
407 nop
408 retl
409 nop
410 #ifndef ABI64
411 .v8lot: andcc %o0,3,%g0
412 bz .v8aligned
413 nop
414 stb %g0,[%o0]
415 sub %o1,1,%o1
416 ba .v8lot
417 add %o0,1,%o0
418 nop
419 .v8aligned:
420 st %g0,[%o0]
421 sub %o1,4,%o1
422 andcc %o1,-4,%g0
423 bnz .v8aligned
424 add %o0,4,%o0
425
426 cmp %o1,0
427 bne .Little
428 nop
429 retl
430 nop
431 #endif
432 .type OPENSSL_cleanse,#function
433 .size OPENSSL_cleanse,.-OPENSSL_cleanse
434
435 .global _sparcv9_vis1_instrument_bus
436 .align 8
437 _sparcv9_vis1_instrument_bus:
438 mov %o1,%o3 ! save cnt
439 .word 0x99410000 !rd %tick,%o4 ! tick
440 mov %o4,%o5 ! lasttick = tick
441 set 0,%g4 ! diff
442
443 andn %o0,63,%g1
444 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
445 .word 0x8143e040 !membar #Sync
446 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
447 .word 0x8143e040 !membar #Sync
448 ld [%o0],%o4
449 add %o4,%g4,%g4
450 .word 0xc9e2100c !cas [%o0],%o4,%g4
451
452 .Loop: .word 0x99410000 !rd %tick,%o4
453 sub %o4,%o5,%g4 ! diff=tick-lasttick
454 mov %o4,%o5 ! lasttick=tick
455
456 andn %o0,63,%g1
457 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
458 .word 0x8143e040 !membar #Sync
459 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
460 .word 0x8143e040 !membar #Sync
461 ld [%o0],%o4
462 add %o4,%g4,%g4
463 .word 0xc9e2100c !cas [%o0],%o4,%g4
464 subcc %o1,1,%o1 ! --$cnt
465 bnz .Loop
466 add %o0,4,%o0 ! ++$out
467
468 retl
469 mov %o3,%o0
470 .type _sparcv9_vis1_instrument_bus,#function
471 .size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
472
473 .global _sparcv9_vis1_instrument_bus2
474 .align 8
475 _sparcv9_vis1_instrument_bus2:
476 mov %o1,%o3 ! save cnt
477 sll %o1,2,%o1 ! cnt*=4
478
479 .word 0x99410000 !rd %tick,%o4 ! tick
480 mov %o4,%o5 ! lasttick = tick
481 set 0,%g4 ! diff
482
483 andn %o0,63,%g1
484 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
485 .word 0x8143e040 !membar #Sync
486 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
487 .word 0x8143e040 !membar #Sync
488 ld [%o0],%o4
489 add %o4,%g4,%g4
490 .word 0xc9e2100c !cas [%o0],%o4,%g4
491
492 .word 0x99410000 !rd %tick,%o4 ! tick
493 sub %o4,%o5,%g4 ! diff=tick-lasttick
494 mov %o4,%o5 ! lasttick=tick
495 mov %g4,%g5 ! lastdiff=diff
496 .Loop2:
497 andn %o0,63,%g1
498 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
499 .word 0x8143e040 !membar #Sync
500 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
501 .word 0x8143e040 !membar #Sync
502 ld [%o0],%o4
503 add %o4,%g4,%g4
504 .word 0xc9e2100c !cas [%o0],%o4,%g4
505
506 subcc %o2,1,%o2 ! --max
507 bz .Ldone2
508 nop
509
510 .word 0x99410000 !rd %tick,%o4 ! tick
511 sub %o4,%o5,%g4 ! diff=tick-lasttick
512 mov %o4,%o5 ! lasttick=tick
513 cmp %g4,%g5
514 mov %g4,%g5 ! lastdiff=diff
515
516 .word 0x83408000 !rd %ccr,%g1
517 and %g1,4,%g1 ! isolate zero flag
518 xor %g1,4,%g1 ! flip zero flag
519
520 subcc %o1,%g1,%o1 ! conditional --$cnt
521 bnz .Loop2
522 add %o0,%g1,%o0 ! conditional ++$out
523
524 .Ldone2:
525 srl %o1,2,%o1
526 retl
527 sub %o3,%o1,%o0
528 .type _sparcv9_vis1_instrument_bus2,#function
529 .size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
530
531 .section ".init",#alloc,#execinstr
532 call OPENSSL_cpuid_setup
533 nop