]>
Commit | Line | Data |
---|---|---|
44c8a5e2 RS |
1 | ! Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. |
2 | ! | |
0e9725bc | 3 | ! Licensed under the Apache License 2.0 (the "License"). You may not use |
44c8a5e2 RS |
4 | ! this file except in compliance with the License. You can obtain a copy |
5 | ! in the file LICENSE in the source distribution or at | |
6 | ! https://www.openssl.org/source/license.html | |
7 | ||
cee73df3 AP |
8 | #if defined(__SUNPRO_C) && defined(__sparcv9) |
9 | # define ABI64 /* They've said -xarch=v9 at command line */ | |
10 | #elif defined(__GNUC__) && defined(__arch64__) | |
11 | # define ABI64 /* They've said -m64 at command line */ | |
12 | #endif | |
13 | ||
14 | #ifdef ABI64 | |
15 | .register %g2,#scratch | |
16 | .register %g3,#scratch | |
17 | # define FRAME -192 | |
18 | # define BIAS 2047 | |
19 | #else | |
20 | # define FRAME -96 | |
21 | # define BIAS 0 | |
22 | #endif | |
23 | ||
24 | .text | |
25 | .align 32 | |
26 | .global OPENSSL_wipe_cpu | |
27 | .type OPENSSL_wipe_cpu,#function | |
28 | ! Keep in mind that this does not excuse us from wiping the stack! | |
29 | ! This routine wipes registers, but not the backing store [which | |
30 | ! resides on the stack, toward lower addresses]. To facilitate for | |
31 | ! stack wiping I return pointer to the top of stack of the *caller*. | |
32 | OPENSSL_wipe_cpu: | |
33 | save %sp,FRAME,%sp | |
34 | nop | |
35 | #ifdef __sun | |
36 | #include <sys/trap.h> | |
37 | ta ST_CLEAN_WINDOWS | |
38 | #else | |
39 | call .walk.reg.wins | |
40 | #endif | |
41 | nop | |
42 | call .PIC.zero.up | |
43 | mov .zero-(.-4),%o0 | |
c06b0f3d AP |
44 | ld [%o0],%f0 |
45 | ld [%o0],%f1 | |
cee73df3 AP |
46 | |
47 | subcc %g0,1,%o0 | |
48 | ! Following is V9 "rd %ccr,%o0" instruction. However! V8 | |
49 | ! specification says that it ("rd %asr2,%o0" in V8 terms) does | |
50 | ! not cause illegal_instruction trap. It therefore can be used | |
51 | ! to determine if the CPU the code is executing on is V8- or | |
52 | ! V9-compliant, as V9 returns a distinct value of 0x99, | |
53 | ! "negative" and "borrow" bits set in both %icc and %xcc. | |
54 | .word 0x91408000 !rd %ccr,%o0 | |
55 | cmp %o0,0x99 | |
56 | bne .v8 | |
57 | nop | |
58 | ! Even though we do not use %fp register bank, | |
59 | ! we wipe it as memcpy might have used it... | |
60 | .word 0xbfa00040 !fmovd %f0,%f62 | |
61 | .word 0xbba00040 !... | |
62 | .word 0xb7a00040 | |
63 | .word 0xb3a00040 | |
64 | .word 0xafa00040 | |
65 | .word 0xaba00040 | |
66 | .word 0xa7a00040 | |
67 | .word 0xa3a00040 | |
68 | .word 0x9fa00040 | |
69 | .word 0x9ba00040 | |
70 | .word 0x97a00040 | |
71 | .word 0x93a00040 | |
72 | .word 0x8fa00040 | |
73 | .word 0x8ba00040 | |
74 | .word 0x87a00040 | |
75 | .word 0x83a00040 !fmovd %f0,%f32 | |
76 | .v8: fmovs %f1,%f31 | |
77 | clr %o0 | |
78 | fmovs %f0,%f30 | |
79 | clr %o1 | |
80 | fmovs %f1,%f29 | |
81 | clr %o2 | |
82 | fmovs %f0,%f28 | |
83 | clr %o3 | |
84 | fmovs %f1,%f27 | |
85 | clr %o4 | |
86 | fmovs %f0,%f26 | |
87 | clr %o5 | |
88 | fmovs %f1,%f25 | |
89 | clr %o7 | |
90 | fmovs %f0,%f24 | |
91 | clr %l0 | |
92 | fmovs %f1,%f23 | |
93 | clr %l1 | |
94 | fmovs %f0,%f22 | |
95 | clr %l2 | |
96 | fmovs %f1,%f21 | |
97 | clr %l3 | |
98 | fmovs %f0,%f20 | |
99 | clr %l4 | |
100 | fmovs %f1,%f19 | |
101 | clr %l5 | |
102 | fmovs %f0,%f18 | |
103 | clr %l6 | |
104 | fmovs %f1,%f17 | |
105 | clr %l7 | |
106 | fmovs %f0,%f16 | |
107 | clr %i0 | |
108 | fmovs %f1,%f15 | |
109 | clr %i1 | |
110 | fmovs %f0,%f14 | |
111 | clr %i2 | |
112 | fmovs %f1,%f13 | |
113 | clr %i3 | |
114 | fmovs %f0,%f12 | |
115 | clr %i4 | |
116 | fmovs %f1,%f11 | |
117 | clr %i5 | |
118 | fmovs %f0,%f10 | |
119 | clr %g1 | |
120 | fmovs %f1,%f9 | |
121 | clr %g2 | |
122 | fmovs %f0,%f8 | |
123 | clr %g3 | |
124 | fmovs %f1,%f7 | |
125 | clr %g4 | |
126 | fmovs %f0,%f6 | |
127 | clr %g5 | |
128 | fmovs %f1,%f5 | |
129 | fmovs %f0,%f4 | |
130 | fmovs %f1,%f3 | |
131 | fmovs %f0,%f2 | |
132 | ||
053fa39a | 133 | add %fp,BIAS,%i0 ! return pointer to callerĀ“s top of stack |
cee73df3 AP |
134 | |
135 | ret | |
136 | restore | |
137 | ||
138 | .zero: .long 0x0,0x0 | |
139 | .PIC.zero.up: | |
140 | retl | |
141 | add %o0,%o7,%o0 | |
142 | #ifdef DEBUG | |
143 | .global walk_reg_wins | |
144 | .type walk_reg_wins,#function | |
145 | walk_reg_wins: | |
146 | #endif | |
147 | .walk.reg.wins: | |
148 | save %sp,FRAME,%sp | |
149 | cmp %i7,%o7 | |
150 | be 2f | |
151 | clr %o0 | |
152 | cmp %o7,0 ! compiler never cleans %o7... | |
153 | be 1f ! could have been a leaf function... | |
154 | clr %o1 | |
155 | call .walk.reg.wins | |
156 | nop | |
157 | 1: clr %o2 | |
158 | clr %o3 | |
159 | clr %o4 | |
160 | clr %o5 | |
161 | clr %o7 | |
162 | clr %l0 | |
163 | clr %l1 | |
164 | clr %l2 | |
165 | clr %l3 | |
166 | clr %l4 | |
167 | clr %l5 | |
168 | clr %l6 | |
169 | clr %l7 | |
170 | add %o0,1,%i0 ! used for debugging | |
171 | 2: ret | |
172 | restore | |
173 | .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu | |
174 | ||
175 | .global OPENSSL_atomic_add | |
176 | .type OPENSSL_atomic_add,#function | |
c06b0f3d | 177 | .align 32 |
cee73df3 AP |
178 | OPENSSL_atomic_add: |
179 | #ifndef ABI64 | |
180 | subcc %g0,1,%o2 | |
181 | .word 0x95408000 !rd %ccr,%o2, see comment above | |
182 | cmp %o2,0x99 | |
183 | be .v9 | |
184 | nop | |
185 | save %sp,FRAME,%sp | |
186 | ba .enter | |
187 | nop | |
188 | #ifdef __sun | |
6a79b3cb | 189 | ! Note that you do not have to link with libthread to call thr_yield, |
cee73df3 AP |
190 | ! as libc provides a stub, which is overloaded the moment you link |
191 | ! with *either* libpthread or libthread... | |
192 | #define YIELD_CPU thr_yield | |
193 | #else | |
194 | ! applies at least to Linux and FreeBSD... Feedback expected... | |
195 | #define YIELD_CPU sched_yield | |
196 | #endif | |
197 | .spin: call YIELD_CPU | |
198 | nop | |
199 | .enter: ld [%i0],%i2 | |
200 | cmp %i2,-4096 | |
201 | be .spin | |
202 | mov -1,%i2 | |
203 | swap [%i0],%i2 | |
204 | cmp %i2,-1 | |
205 | be .spin | |
206 | add %i2,%i1,%i2 | |
207 | stbar | |
208 | st %i2,[%i0] | |
209 | sra %i2,%g0,%i0 | |
210 | ret | |
211 | restore | |
212 | .v9: | |
213 | #endif | |
214 | ld [%o0],%o2 | |
215 | 1: add %o1,%o2,%o3 | |
216 | .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3 | |
217 | cmp %o2,%o3 | |
218 | bne 1b | |
219 | mov %o3,%o2 ! cas is always fetching to dest. register | |
220 | add %o1,%o2,%o0 ! OpenSSL expects the new value | |
221 | retl | |
222 | sra %o0,%g0,%o0 ! we return signed int, remember? | |
223 | .size OPENSSL_atomic_add,.-OPENSSL_atomic_add | |
224 | ||
a00e414f | 225 | .global _sparcv9_rdtick |
c06b0f3d | 226 | .align 32 |
a00e414f | 227 | _sparcv9_rdtick: |
cee73df3 AP |
228 | subcc %g0,1,%o0 |
229 | .word 0x91408000 !rd %ccr,%o0 | |
230 | cmp %o0,0x99 | |
c06b0f3d | 231 | bne .notick |
cee73df3 | 232 | xor %o0,%o0,%o0 |
c06b0f3d AP |
233 | .word 0x91410000 !rd %tick,%o0 |
234 | retl | |
c32fcca6 | 235 | .word 0x93323020 !srlx %o0,32,%o1 |
c06b0f3d | 236 | .notick: |
cee73df3 | 237 | retl |
c06b0f3d | 238 | xor %o1,%o1,%o1 |
a00e414f AP |
239 | .type _sparcv9_rdtick,#function |
240 | .size _sparcv9_rdtick,.-_sparcv9_rdtick | |
c06b0f3d | 241 | |
4b2603e4 AP |
242 | .global _sparcv9_vis1_probe |
243 | .align 8 | |
244 | _sparcv9_vis1_probe: | |
4b2603e4 | 245 | add %sp,BIAS+2,%o1 |
4b2603e4 | 246 | .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0 |
3caeef94 AP |
247 | retl |
248 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
4b2603e4 AP |
249 | .type _sparcv9_vis1_probe,#function |
250 | .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe | |
251 | ||
7c5889bf AP |
252 | ! Probe and instrument VIS1 instruction. Output is number of cycles it |
253 | ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit | |
254 | ! is slow (documented to be 6 cycles on T2) and the core is in-order | |
255 | ! single-issue, it should be possible to distinguish Tx reliably... | |
256 | ! Observed return values are: | |
257 | ! | |
4b2603e4 | 258 | ! UltraSPARC IIe 7 |
7c5889bf AP |
259 | ! UltraSPARC III 7 |
260 | ! UltraSPARC T1 24 | |
1fda639a AP |
261 | ! SPARC T4 65(*) |
262 | ! | |
263 | ! (*) result has lesser to do with VIS instruction latencies, rdtick | |
264 | ! appears that slow, but it does the trick in sense that FP and | |
265 | ! VIS code paths are still slower than integer-only ones. | |
7c5889bf AP |
266 | ! |
267 | ! Numbers for T2 and SPARC64 V-VII are more than welcomed. | |
268 | ! | |
269 | ! It would be possible to detect specifically US-T1 by instrumenting | |
270 | ! fmul8ulx16, which is emulated on T1 and as such accounts for quite | |
271 | ! a lot of %tick-s, couple of thousand on Linux... | |
4b2603e4 | 272 | .global _sparcv9_vis1_instrument |
c32fcca6 | 273 | .align 8 |
4b2603e4 | 274 | _sparcv9_vis1_instrument: |
1fda639a AP |
275 | .word 0x81b00d80 !fxor %f0,%f0,%f0 |
276 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
7c5889bf AP |
277 | .word 0x91410000 !rd %tick,%o0 |
278 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
279 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
280 | .word 0x93410000 !rd %tick,%o1 | |
281 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
282 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
283 | .word 0x95410000 !rd %tick,%o2 | |
284 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
285 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
286 | .word 0x97410000 !rd %tick,%o3 | |
287 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
288 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
289 | .word 0x99410000 !rd %tick,%o4 | |
290 | ||
291 | ! calculate intervals | |
292 | sub %o1,%o0,%o0 | |
293 | sub %o2,%o1,%o1 | |
294 | sub %o3,%o2,%o2 | |
295 | sub %o4,%o3,%o3 | |
296 | ||
60250017 | 297 | ! find minimum value |
7c5889bf AP |
298 | cmp %o0,%o1 |
299 | .word 0x38680002 !bgu,a %xcc,.+8 | |
300 | mov %o1,%o0 | |
301 | cmp %o0,%o2 | |
302 | .word 0x38680002 !bgu,a %xcc,.+8 | |
303 | mov %o2,%o0 | |
304 | cmp %o0,%o3 | |
305 | .word 0x38680002 !bgu,a %xcc,.+8 | |
306 | mov %o3,%o0 | |
307 | ||
c32fcca6 | 308 | retl |
4b2603e4 AP |
309 | nop |
310 | .type _sparcv9_vis1_instrument,#function | |
311 | .size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument | |
312 | ||
313 | .global _sparcv9_vis2_probe | |
314 | .align 8 | |
315 | _sparcv9_vis2_probe: | |
316 | retl | |
317 | .word 0x81b00980 !bshuffle %f0,%f0,%f0 | |
318 | .type _sparcv9_vis2_probe,#function | |
319 | .size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe | |
320 | ||
321 | .global _sparcv9_fmadd_probe | |
322 | .align 8 | |
323 | _sparcv9_fmadd_probe: | |
324 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
325 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
326 | retl | |
327 | .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0 | |
328 | .type _sparcv9_fmadd_probe,#function | |
329 | .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe | |
c32fcca6 | 330 | |
1fda639a AP |
331 | .global _sparcv9_rdcfr |
332 | .align 8 | |
333 | _sparcv9_rdcfr: | |
334 | retl | |
335 | .word 0x91468000 !rd %asr26,%o0 | |
336 | .type _sparcv9_rdcfr,#function | |
337 | .size _sparcv9_rdcfr,.-_sparcv9_rdcfr | |
338 | ||
339 | .global _sparcv9_vis3_probe | |
340 | .align 8 | |
341 | _sparcv9_vis3_probe: | |
342 | retl | |
343 | .word 0x81b022a0 !xmulx %g0,%g0,%g0 | |
344 | .type _sparcv9_vis3_probe,#function | |
345 | .size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe | |
346 | ||
347 | .global _sparcv9_random | |
348 | .align 8 | |
349 | _sparcv9_random: | |
350 | retl | |
351 | .word 0x91b002a0 !random %o0 | |
352 | .type _sparcv9_random,#function | |
353 | .size _sparcv9_random,.-_sparcv9_vis3_probe | |
354 | ||
4400f6c6 AP |
355 | .global _sparcv9_fjaesx_probe |
356 | .align 8 | |
357 | _sparcv9_fjaesx_probe: | |
358 | .word 0x81b09206 !faesencx %f2,%f6,%f0 | |
359 | retl | |
360 | nop | |
361 | .size _sparcv9_fjaesx_probe,.-_sparcv9_fjaesx_probe | |
362 | ||
b2dba9bf AP |
363 | .global OPENSSL_cleanse |
364 | .align 32 | |
365 | OPENSSL_cleanse: | |
aa5c99fa | 366 | cmp %o1,14 |
b2dba9bf AP |
367 | nop |
368 | #ifdef ABI64 | |
369 | bgu %xcc,.Lot | |
370 | #else | |
371 | bgu .Lot | |
372 | #endif | |
7676eebf AP |
373 | cmp %o1,0 |
374 | bne .Little | |
375 | nop | |
376 | retl | |
b2dba9bf AP |
377 | nop |
378 | ||
379 | .Little: | |
380 | stb %g0,[%o0] | |
381 | subcc %o1,1,%o1 | |
382 | bnz .Little | |
383 | add %o0,1,%o0 | |
384 | retl | |
385 | nop | |
386 | .align 32 | |
387 | .Lot: | |
aa5c99fa AP |
388 | #ifndef ABI64 |
389 | subcc %g0,1,%g1 | |
390 | ! see above for explanation | |
391 | .word 0x83408000 !rd %ccr,%g1 | |
392 | cmp %g1,0x99 | |
393 | bne .v8lot | |
394 | nop | |
395 | #endif | |
396 | ||
397 | .v9lot: andcc %o0,7,%g0 | |
398 | bz .v9aligned | |
399 | nop | |
400 | stb %g0,[%o0] | |
401 | sub %o1,1,%o1 | |
402 | ba .v9lot | |
403 | add %o0,1,%o0 | |
404 | .align 16,0x01000000 | |
405 | .v9aligned: | |
406 | .word 0xc0720000 !stx %g0,[%o0] | |
407 | sub %o1,8,%o1 | |
408 | andcc %o1,-8,%g0 | |
409 | #ifdef ABI64 | |
410 | .word 0x126ffffd !bnz %xcc,.v9aligned | |
411 | #else | |
412 | .word 0x124ffffd !bnz %icc,.v9aligned | |
413 | #endif | |
414 | add %o0,8,%o0 | |
415 | ||
416 | cmp %o1,0 | |
417 | bne .Little | |
418 | nop | |
419 | retl | |
420 | nop | |
421 | #ifndef ABI64 | |
422 | .v8lot: andcc %o0,3,%g0 | |
423 | bz .v8aligned | |
b2dba9bf AP |
424 | nop |
425 | stb %g0,[%o0] | |
426 | sub %o1,1,%o1 | |
aa5c99fa | 427 | ba .v8lot |
b2dba9bf AP |
428 | add %o0,1,%o0 |
429 | nop | |
aa5c99fa | 430 | .v8aligned: |
b2dba9bf AP |
431 | st %g0,[%o0] |
432 | sub %o1,4,%o1 | |
433 | andcc %o1,-4,%g0 | |
aa5c99fa | 434 | bnz .v8aligned |
b2dba9bf AP |
435 | add %o0,4,%o0 |
436 | ||
437 | cmp %o1,0 | |
438 | bne .Little | |
439 | nop | |
440 | retl | |
441 | nop | |
aa5c99fa | 442 | #endif |
b2dba9bf AP |
443 | .type OPENSSL_cleanse,#function |
444 | .size OPENSSL_cleanse,.-OPENSSL_cleanse | |
445 | ||
e33826f0 AP |
446 | .global CRYPTO_memcmp |
447 | .align 16 | |
448 | CRYPTO_memcmp: | |
449 | cmp %o2,0 | |
450 | #ifdef ABI64 | |
451 | beq,pn %xcc,.Lno_data | |
452 | #else | |
453 | beq .Lno_data | |
454 | #endif | |
455 | xor %g1,%g1,%g1 | |
456 | nop | |
457 | ||
458 | .Loop_cmp: | |
459 | ldub [%o0],%o3 | |
460 | add %o0,1,%o0 | |
461 | ldub [%o1],%o4 | |
462 | add %o1,1,%o1 | |
463 | subcc %o2,1,%o2 | |
464 | xor %o3,%o4,%o4 | |
465 | #ifdef ABI64 | |
466 | bnz %xcc,.Loop_cmp | |
467 | #else | |
468 | bnz .Loop_cmp | |
469 | #endif | |
470 | or %o4,%g1,%g1 | |
471 | ||
472 | sub %g0,%g1,%g1 | |
473 | srl %g1,31,%g1 | |
474 | .Lno_data: | |
475 | retl | |
476 | mov %g1,%o0 | |
477 | .type CRYPTO_memcmp,#function | |
478 | .size CRYPTO_memcmp,.-CRYPTO_memcmp | |
479 | ||
5fabb88a AP |
480 | .global _sparcv9_vis1_instrument_bus |
481 | .align 8 | |
482 | _sparcv9_vis1_instrument_bus: | |
483 | mov %o1,%o3 ! save cnt | |
484 | .word 0x99410000 !rd %tick,%o4 ! tick | |
485 | mov %o4,%o5 ! lasttick = tick | |
486 | set 0,%g4 ! diff | |
487 | ||
488 | andn %o0,63,%g1 | |
489 | .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load | |
490 | .word 0x8143e040 !membar #Sync | |
491 | .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit | |
492 | .word 0x8143e040 !membar #Sync | |
493 | ld [%o0],%o4 | |
494 | add %o4,%g4,%g4 | |
495 | .word 0xc9e2100c !cas [%o0],%o4,%g4 | |
496 | ||
497 | .Loop: .word 0x99410000 !rd %tick,%o4 | |
498 | sub %o4,%o5,%g4 ! diff=tick-lasttick | |
499 | mov %o4,%o5 ! lasttick=tick | |
500 | ||
501 | andn %o0,63,%g1 | |
502 | .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load | |
503 | .word 0x8143e040 !membar #Sync | |
504 | .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit | |
505 | .word 0x8143e040 !membar #Sync | |
506 | ld [%o0],%o4 | |
507 | add %o4,%g4,%g4 | |
508 | .word 0xc9e2100c !cas [%o0],%o4,%g4 | |
509 | subcc %o1,1,%o1 ! --$cnt | |
510 | bnz .Loop | |
511 | add %o0,4,%o0 ! ++$out | |
512 | ||
513 | retl | |
514 | mov %o3,%o0 | |
515 | .type _sparcv9_vis1_instrument_bus,#function | |
516 | .size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus | |
517 | ||
518 | .global _sparcv9_vis1_instrument_bus2 | |
519 | .align 8 | |
520 | _sparcv9_vis1_instrument_bus2: | |
521 | mov %o1,%o3 ! save cnt | |
522 | sll %o1,2,%o1 ! cnt*=4 | |
523 | ||
524 | .word 0x99410000 !rd %tick,%o4 ! tick | |
525 | mov %o4,%o5 ! lasttick = tick | |
526 | set 0,%g4 ! diff | |
527 | ||
528 | andn %o0,63,%g1 | |
529 | .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load | |
530 | .word 0x8143e040 !membar #Sync | |
531 | .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit | |
532 | .word 0x8143e040 !membar #Sync | |
533 | ld [%o0],%o4 | |
534 | add %o4,%g4,%g4 | |
535 | .word 0xc9e2100c !cas [%o0],%o4,%g4 | |
536 | ||
537 | .word 0x99410000 !rd %tick,%o4 ! tick | |
538 | sub %o4,%o5,%g4 ! diff=tick-lasttick | |
539 | mov %o4,%o5 ! lasttick=tick | |
540 | mov %g4,%g5 ! lastdiff=diff | |
541 | .Loop2: | |
542 | andn %o0,63,%g1 | |
543 | .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load | |
544 | .word 0x8143e040 !membar #Sync | |
545 | .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit | |
546 | .word 0x8143e040 !membar #Sync | |
547 | ld [%o0],%o4 | |
548 | add %o4,%g4,%g4 | |
549 | .word 0xc9e2100c !cas [%o0],%o4,%g4 | |
550 | ||
551 | subcc %o2,1,%o2 ! --max | |
552 | bz .Ldone2 | |
553 | nop | |
554 | ||
555 | .word 0x99410000 !rd %tick,%o4 ! tick | |
556 | sub %o4,%o5,%g4 ! diff=tick-lasttick | |
557 | mov %o4,%o5 ! lasttick=tick | |
558 | cmp %g4,%g5 | |
559 | mov %g4,%g5 ! lastdiff=diff | |
560 | ||
561 | .word 0x83408000 !rd %ccr,%g1 | |
562 | and %g1,4,%g1 ! isolate zero flag | |
563 | xor %g1,4,%g1 ! flip zero flag | |
564 | ||
565 | subcc %o1,%g1,%o1 ! conditional --$cnt | |
566 | bnz .Loop2 | |
567 | add %o0,%g1,%o0 ! conditional ++$out | |
568 | ||
569 | .Ldone2: | |
570 | srl %o1,2,%o1 | |
571 | retl | |
572 | sub %o3,%o1,%o0 | |
573 | .type _sparcv9_vis1_instrument_bus2,#function | |
574 | .size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2 | |
575 | ||
c06b0f3d AP |
576 | .section ".init",#alloc,#execinstr |
577 | call OPENSSL_cpuid_setup | |
cee73df3 | 578 | nop |