]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/sparccpuid.S
PR: 2840
[thirdparty/openssl.git] / crypto / sparccpuid.S
1 #ifdef OPENSSL_FIPSCANISTER
2 #include <openssl/fipssyms.h>
3 #endif
4
5 #if defined(__SUNPRO_C) && defined(__sparcv9)
6 # define ABI64 /* They've said -xarch=v9 at command line */
7 #elif defined(__GNUC__) && defined(__arch64__)
8 # define ABI64 /* They've said -m64 at command line */
9 #endif
10
11 #ifdef ABI64
12 .register %g2,#scratch
13 .register %g3,#scratch
14 # define FRAME -192
15 # define BIAS 2047
16 #else
17 # define FRAME -96
18 # define BIAS 0
19 #endif
20
21 .text
22 .align 32
23 .global OPENSSL_wipe_cpu
24 .type OPENSSL_wipe_cpu,#function
25 ! Keep in mind that this does not excuse us from wiping the stack!
26 ! This routine wipes registers, but not the backing store [which
27 ! resides on the stack, toward lower addresses]. To facilitate for
28 ! stack wiping I return pointer to the top of stack of the *caller*.
29 OPENSSL_wipe_cpu:
30 save %sp,FRAME,%sp
31 nop
32 #ifdef __sun
33 #include <sys/trap.h>
34 ta ST_CLEAN_WINDOWS
35 #else
36 call .walk.reg.wins
37 #endif
38 nop
39 call .PIC.zero.up
40 mov .zero-(.-4),%o0
41 ld [%o0],%f0
42 ld [%o0],%f1
43
44 subcc %g0,1,%o0
45 ! Following is V9 "rd %ccr,%o0" instruction. However! V8
46 ! specification says that it ("rd %asr2,%o0" in V8 terms) does
47 ! not cause illegal_instruction trap. It therefore can be used
48 ! to determine if the CPU the code is executing on is V8- or
49 ! V9-compliant, as V9 returns a distinct value of 0x99,
50 ! "negative" and "borrow" bits set in both %icc and %xcc.
51 .word 0x91408000 !rd %ccr,%o0
52 cmp %o0,0x99
53 bne .v8
54 nop
55 ! Even though we do not use %fp register bank,
56 ! we wipe it as memcpy might have used it...
57 .word 0xbfa00040 !fmovd %f0,%f62
58 .word 0xbba00040 !...
59 .word 0xb7a00040
60 .word 0xb3a00040
61 .word 0xafa00040
62 .word 0xaba00040
63 .word 0xa7a00040
64 .word 0xa3a00040
65 .word 0x9fa00040
66 .word 0x9ba00040
67 .word 0x97a00040
68 .word 0x93a00040
69 .word 0x8fa00040
70 .word 0x8ba00040
71 .word 0x87a00040
72 .word 0x83a00040 !fmovd %f0,%f32
73 .v8: fmovs %f1,%f31
74 clr %o0
75 fmovs %f0,%f30
76 clr %o1
77 fmovs %f1,%f29
78 clr %o2
79 fmovs %f0,%f28
80 clr %o3
81 fmovs %f1,%f27
82 clr %o4
83 fmovs %f0,%f26
84 clr %o5
85 fmovs %f1,%f25
86 clr %o7
87 fmovs %f0,%f24
88 clr %l0
89 fmovs %f1,%f23
90 clr %l1
91 fmovs %f0,%f22
92 clr %l2
93 fmovs %f1,%f21
94 clr %l3
95 fmovs %f0,%f20
96 clr %l4
97 fmovs %f1,%f19
98 clr %l5
99 fmovs %f0,%f18
100 clr %l6
101 fmovs %f1,%f17
102 clr %l7
103 fmovs %f0,%f16
104 clr %i0
105 fmovs %f1,%f15
106 clr %i1
107 fmovs %f0,%f14
108 clr %i2
109 fmovs %f1,%f13
110 clr %i3
111 fmovs %f0,%f12
112 clr %i4
113 fmovs %f1,%f11
114 clr %i5
115 fmovs %f0,%f10
116 clr %g1
117 fmovs %f1,%f9
118 clr %g2
119 fmovs %f0,%f8
120 clr %g3
121 fmovs %f1,%f7
122 clr %g4
123 fmovs %f0,%f6
124 clr %g5
125 fmovs %f1,%f5
126 fmovs %f0,%f4
127 fmovs %f1,%f3
128 fmovs %f0,%f2
129
130 add %fp,BIAS,%i0 ! return pointer to callerĀ“s top of stack
131
132 ret
133 restore
134
135 .zero: .long 0x0,0x0
136 .PIC.zero.up:
137 retl
138 add %o0,%o7,%o0
139 #ifdef DEBUG
140 .global walk_reg_wins
141 .type walk_reg_wins,#function
142 walk_reg_wins:
143 #endif
144 .walk.reg.wins:
145 save %sp,FRAME,%sp
146 cmp %i7,%o7
147 be 2f
148 clr %o0
149 cmp %o7,0 ! compiler never cleans %o7...
150 be 1f ! could have been a leaf function...
151 clr %o1
152 call .walk.reg.wins
153 nop
154 1: clr %o2
155 clr %o3
156 clr %o4
157 clr %o5
158 clr %o7
159 clr %l0
160 clr %l1
161 clr %l2
162 clr %l3
163 clr %l4
164 clr %l5
165 clr %l6
166 clr %l7
167 add %o0,1,%i0 ! used for debugging
168 2: ret
169 restore
170 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
171
172 .global OPENSSL_atomic_add
173 .type OPENSSL_atomic_add,#function
174 .align 32
175 OPENSSL_atomic_add:
176 #ifndef ABI64
177 subcc %g0,1,%o2
178 .word 0x95408000 !rd %ccr,%o2, see comment above
179 cmp %o2,0x99
180 be .v9
181 nop
182 save %sp,FRAME,%sp
183 ba .enter
184 nop
185 #ifdef __sun
186 ! Note that you do not have to link with libthread to call thr_yield,
187 ! as libc provides a stub, which is overloaded the moment you link
188 ! with *either* libpthread or libthread...
189 #define YIELD_CPU thr_yield
190 #else
191 ! applies at least to Linux and FreeBSD... Feedback expected...
192 #define YIELD_CPU sched_yield
193 #endif
194 .spin: call YIELD_CPU
195 nop
196 .enter: ld [%i0],%i2
197 cmp %i2,-4096
198 be .spin
199 mov -1,%i2
200 swap [%i0],%i2
201 cmp %i2,-1
202 be .spin
203 add %i2,%i1,%i2
204 stbar
205 st %i2,[%i0]
206 sra %i2,%g0,%i0
207 ret
208 restore
209 .v9:
210 #endif
211 ld [%o0],%o2
212 1: add %o1,%o2,%o3
213 .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3
214 cmp %o2,%o3
215 bne 1b
216 mov %o3,%o2 ! cas is always fetching to dest. register
217 add %o1,%o2,%o0 ! OpenSSL expects the new value
218 retl
219 sra %o0,%g0,%o0 ! we return signed int, remember?
220 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
221
222 .global _sparcv9_rdtick
223 .align 32
224 _sparcv9_rdtick:
225 subcc %g0,1,%o0
226 .word 0x91408000 !rd %ccr,%o0
227 cmp %o0,0x99
228 bne .notick
229 xor %o0,%o0,%o0
230 .word 0x91410000 !rd %tick,%o0
231 retl
232 .word 0x93323020 !srlx %o0,32,%o1
233 .notick:
234 retl
235 xor %o1,%o1,%o1
236 .type _sparcv9_rdtick,#function
237 .size _sparcv9_rdtick,.-_sparcv9_rdtick
238
239 .global _sparcv9_vis1_probe
240 .align 8
241 _sparcv9_vis1_probe:
242 .word 0x81b00d80 !fxor %f0,%f0,%f0
243 add %sp,BIAS+2,%o1
244 retl
245 .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0
246 .type _sparcv9_vis1_probe,#function
247 .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe
248
249 ! Probe and instrument VIS1 instruction. Output is number of cycles it
250 ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit
251 ! is slow (documented to be 6 cycles on T2) and the core is in-order
252 ! single-issue, it should be possible to distinguish Tx reliably...
253 ! Observed return values are:
254 !
255 ! UltraSPARC IIe 7
256 ! UltraSPARC III 7
257 ! UltraSPARC T1 24
258 !
259 ! Numbers for T2 and SPARC64 V-VII are more than welcomed.
260 !
261 ! It would be possible to detect specifically US-T1 by instrumenting
262 ! fmul8ulx16, which is emulated on T1 and as such accounts for quite
263 ! a lot of %tick-s, couple of thousand on Linux...
264 .global _sparcv9_vis1_instrument
265 .align 8
266 _sparcv9_vis1_instrument:
267 .word 0x91410000 !rd %tick,%o0
268 .word 0x81b00d80 !fxor %f0,%f0,%f0
269 .word 0x85b08d82 !fxor %f2,%f2,%f2
270 .word 0x93410000 !rd %tick,%o1
271 .word 0x81b00d80 !fxor %f0,%f0,%f0
272 .word 0x85b08d82 !fxor %f2,%f2,%f2
273 .word 0x95410000 !rd %tick,%o2
274 .word 0x81b00d80 !fxor %f0,%f0,%f0
275 .word 0x85b08d82 !fxor %f2,%f2,%f2
276 .word 0x97410000 !rd %tick,%o3
277 .word 0x81b00d80 !fxor %f0,%f0,%f0
278 .word 0x85b08d82 !fxor %f2,%f2,%f2
279 .word 0x99410000 !rd %tick,%o4
280
281 ! calculate intervals
282 sub %o1,%o0,%o0
283 sub %o2,%o1,%o1
284 sub %o3,%o2,%o2
285 sub %o4,%o3,%o3
286
287 ! find minumum value
288 cmp %o0,%o1
289 .word 0x38680002 !bgu,a %xcc,.+8
290 mov %o1,%o0
291 cmp %o0,%o2
292 .word 0x38680002 !bgu,a %xcc,.+8
293 mov %o2,%o0
294 cmp %o0,%o3
295 .word 0x38680002 !bgu,a %xcc,.+8
296 mov %o3,%o0
297
298 retl
299 nop
300 .type _sparcv9_vis1_instrument,#function
301 .size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument
302
303 .global _sparcv9_vis2_probe
304 .align 8
305 _sparcv9_vis2_probe:
306 retl
307 .word 0x81b00980 !bshuffle %f0,%f0,%f0
308 .type _sparcv9_vis2_probe,#function
309 .size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe
310
311 .global _sparcv9_fmadd_probe
312 .align 8
313 _sparcv9_fmadd_probe:
314 .word 0x81b00d80 !fxor %f0,%f0,%f0
315 .word 0x85b08d82 !fxor %f2,%f2,%f2
316 retl
317 .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0
318 .type _sparcv9_fmadd_probe,#function
319 .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
320
321 .global OPENSSL_cleanse
322 .align 32
323 OPENSSL_cleanse:
324 cmp %o1,14
325 nop
326 #ifdef ABI64
327 bgu %xcc,.Lot
328 #else
329 bgu .Lot
330 #endif
331 cmp %o1,0
332 bne .Little
333 nop
334 retl
335 nop
336
337 .Little:
338 stb %g0,[%o0]
339 subcc %o1,1,%o1
340 bnz .Little
341 add %o0,1,%o0
342 retl
343 nop
344 .align 32
345 .Lot:
346 #ifndef ABI64
347 subcc %g0,1,%g1
348 ! see above for explanation
349 .word 0x83408000 !rd %ccr,%g1
350 cmp %g1,0x99
351 bne .v8lot
352 nop
353 #endif
354
355 .v9lot: andcc %o0,7,%g0
356 bz .v9aligned
357 nop
358 stb %g0,[%o0]
359 sub %o1,1,%o1
360 ba .v9lot
361 add %o0,1,%o0
362 .align 16,0x01000000
363 .v9aligned:
364 .word 0xc0720000 !stx %g0,[%o0]
365 sub %o1,8,%o1
366 andcc %o1,-8,%g0
367 #ifdef ABI64
368 .word 0x126ffffd !bnz %xcc,.v9aligned
369 #else
370 .word 0x124ffffd !bnz %icc,.v9aligned
371 #endif
372 add %o0,8,%o0
373
374 cmp %o1,0
375 bne .Little
376 nop
377 retl
378 nop
379 #ifndef ABI64
380 .v8lot: andcc %o0,3,%g0
381 bz .v8aligned
382 nop
383 stb %g0,[%o0]
384 sub %o1,1,%o1
385 ba .v8lot
386 add %o0,1,%o0
387 nop
388 .v8aligned:
389 st %g0,[%o0]
390 sub %o1,4,%o1
391 andcc %o1,-4,%g0
392 bnz .v8aligned
393 add %o0,4,%o0
394
395 cmp %o1,0
396 bne .Little
397 nop
398 retl
399 nop
400 #endif
401 .type OPENSSL_cleanse,#function
402 .size OPENSSL_cleanse,.-OPENSSL_cleanse
403
404 .global _sparcv9_vis1_instrument_bus
405 .align 8
406 _sparcv9_vis1_instrument_bus:
407 mov %o1,%o3 ! save cnt
408 .word 0x99410000 !rd %tick,%o4 ! tick
409 mov %o4,%o5 ! lasttick = tick
410 set 0,%g4 ! diff
411
412 andn %o0,63,%g1
413 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
414 .word 0x8143e040 !membar #Sync
415 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
416 .word 0x8143e040 !membar #Sync
417 ld [%o0],%o4
418 add %o4,%g4,%g4
419 .word 0xc9e2100c !cas [%o0],%o4,%g4
420
421 .Loop: .word 0x99410000 !rd %tick,%o4
422 sub %o4,%o5,%g4 ! diff=tick-lasttick
423 mov %o4,%o5 ! lasttick=tick
424
425 andn %o0,63,%g1
426 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
427 .word 0x8143e040 !membar #Sync
428 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
429 .word 0x8143e040 !membar #Sync
430 ld [%o0],%o4
431 add %o4,%g4,%g4
432 .word 0xc9e2100c !cas [%o0],%o4,%g4
433 subcc %o1,1,%o1 ! --$cnt
434 bnz .Loop
435 add %o0,4,%o0 ! ++$out
436
437 retl
438 mov %o3,%o0
439 .type _sparcv9_vis1_instrument_bus,#function
440 .size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
441
442 .global _sparcv9_vis1_instrument_bus2
443 .align 8
444 _sparcv9_vis1_instrument_bus2:
445 mov %o1,%o3 ! save cnt
446 sll %o1,2,%o1 ! cnt*=4
447
448 .word 0x99410000 !rd %tick,%o4 ! tick
449 mov %o4,%o5 ! lasttick = tick
450 set 0,%g4 ! diff
451
452 andn %o0,63,%g1
453 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
454 .word 0x8143e040 !membar #Sync
455 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
456 .word 0x8143e040 !membar #Sync
457 ld [%o0],%o4
458 add %o4,%g4,%g4
459 .word 0xc9e2100c !cas [%o0],%o4,%g4
460
461 .word 0x99410000 !rd %tick,%o4 ! tick
462 sub %o4,%o5,%g4 ! diff=tick-lasttick
463 mov %o4,%o5 ! lasttick=tick
464 mov %g4,%g5 ! lastdiff=diff
465 .Loop2:
466 andn %o0,63,%g1
467 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
468 .word 0x8143e040 !membar #Sync
469 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
470 .word 0x8143e040 !membar #Sync
471 ld [%o0],%o4
472 add %o4,%g4,%g4
473 .word 0xc9e2100c !cas [%o0],%o4,%g4
474
475 subcc %o2,1,%o2 ! --max
476 bz .Ldone2
477 nop
478
479 .word 0x99410000 !rd %tick,%o4 ! tick
480 sub %o4,%o5,%g4 ! diff=tick-lasttick
481 mov %o4,%o5 ! lasttick=tick
482 cmp %g4,%g5
483 mov %g4,%g5 ! lastdiff=diff
484
485 .word 0x83408000 !rd %ccr,%g1
486 and %g1,4,%g1 ! isolate zero flag
487 xor %g1,4,%g1 ! flip zero flag
488
489 subcc %o1,%g1,%o1 ! conditional --$cnt
490 bnz .Loop2
491 add %o0,%g1,%o0 ! conditional ++$out
492
493 .Ldone2:
494 srl %o1,2,%o1
495 retl
496 sub %o3,%o1,%o0
497 .type _sparcv9_vis1_instrument_bus2,#function
498 .size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
499
500 .section ".init",#alloc,#execinstr
501 call OPENSSL_cpuid_setup
502 nop