]>
Commit | Line | Data |
---|---|---|
44c8a5e2 RS |
1 | ! Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. |
2 | ! | |
3 | ! Licensed under the OpenSSL license (the "License"). You may not use | |
4 | ! this file except in compliance with the License. You can obtain a copy | |
5 | ! in the file LICENSE in the source distribution or at | |
6 | ! https://www.openssl.org/source/license.html | |
7 | ||
ae8b47f0 AP |
8 | #ifdef OPENSSL_FIPSCANISTER |
9 | #include <openssl/fipssyms.h> | |
10 | #endif | |
11 | ||
cee73df3 AP |
12 | #if defined(__SUNPRO_C) && defined(__sparcv9) |
13 | # define ABI64 /* They've said -xarch=v9 at command line */ | |
14 | #elif defined(__GNUC__) && defined(__arch64__) | |
15 | # define ABI64 /* They've said -m64 at command line */ | |
16 | #endif | |
17 | ||
18 | #ifdef ABI64 | |
19 | .register %g2,#scratch | |
20 | .register %g3,#scratch | |
21 | # define FRAME -192 | |
22 | # define BIAS 2047 | |
23 | #else | |
24 | # define FRAME -96 | |
25 | # define BIAS 0 | |
26 | #endif | |
27 | ||
28 | .text | |
29 | .align 32 | |
30 | .global OPENSSL_wipe_cpu | |
31 | .type OPENSSL_wipe_cpu,#function | |
32 | ! Keep in mind that this does not excuse us from wiping the stack! | |
33 | ! This routine wipes registers, but not the backing store [which | |
34 | ! resides on the stack, toward lower addresses]. To facilitate for | |
35 | ! stack wiping I return pointer to the top of stack of the *caller*. | |
36 | OPENSSL_wipe_cpu: | |
37 | save %sp,FRAME,%sp | |
38 | nop | |
39 | #ifdef __sun | |
40 | #include <sys/trap.h> | |
41 | ta ST_CLEAN_WINDOWS | |
42 | #else | |
43 | call .walk.reg.wins | |
44 | #endif | |
45 | nop | |
46 | call .PIC.zero.up | |
47 | mov .zero-(.-4),%o0 | |
c06b0f3d AP |
48 | ld [%o0],%f0 |
49 | ld [%o0],%f1 | |
cee73df3 AP |
50 | |
51 | subcc %g0,1,%o0 | |
52 | ! Following is V9 "rd %ccr,%o0" instruction. However! V8 | |
53 | ! specification says that it ("rd %asr2,%o0" in V8 terms) does | |
54 | ! not cause illegal_instruction trap. It therefore can be used | |
55 | ! to determine if the CPU the code is executing on is V8- or | |
56 | ! V9-compliant, as V9 returns a distinct value of 0x99, | |
57 | ! "negative" and "borrow" bits set in both %icc and %xcc. | |
58 | .word 0x91408000 !rd %ccr,%o0 | |
59 | cmp %o0,0x99 | |
60 | bne .v8 | |
61 | nop | |
62 | ! Even though we do not use %fp register bank, | |
63 | ! we wipe it as memcpy might have used it... | |
64 | .word 0xbfa00040 !fmovd %f0,%f62 | |
65 | .word 0xbba00040 !... | |
66 | .word 0xb7a00040 | |
67 | .word 0xb3a00040 | |
68 | .word 0xafa00040 | |
69 | .word 0xaba00040 | |
70 | .word 0xa7a00040 | |
71 | .word 0xa3a00040 | |
72 | .word 0x9fa00040 | |
73 | .word 0x9ba00040 | |
74 | .word 0x97a00040 | |
75 | .word 0x93a00040 | |
76 | .word 0x8fa00040 | |
77 | .word 0x8ba00040 | |
78 | .word 0x87a00040 | |
79 | .word 0x83a00040 !fmovd %f0,%f32 | |
80 | .v8: fmovs %f1,%f31 | |
81 | clr %o0 | |
82 | fmovs %f0,%f30 | |
83 | clr %o1 | |
84 | fmovs %f1,%f29 | |
85 | clr %o2 | |
86 | fmovs %f0,%f28 | |
87 | clr %o3 | |
88 | fmovs %f1,%f27 | |
89 | clr %o4 | |
90 | fmovs %f0,%f26 | |
91 | clr %o5 | |
92 | fmovs %f1,%f25 | |
93 | clr %o7 | |
94 | fmovs %f0,%f24 | |
95 | clr %l0 | |
96 | fmovs %f1,%f23 | |
97 | clr %l1 | |
98 | fmovs %f0,%f22 | |
99 | clr %l2 | |
100 | fmovs %f1,%f21 | |
101 | clr %l3 | |
102 | fmovs %f0,%f20 | |
103 | clr %l4 | |
104 | fmovs %f1,%f19 | |
105 | clr %l5 | |
106 | fmovs %f0,%f18 | |
107 | clr %l6 | |
108 | fmovs %f1,%f17 | |
109 | clr %l7 | |
110 | fmovs %f0,%f16 | |
111 | clr %i0 | |
112 | fmovs %f1,%f15 | |
113 | clr %i1 | |
114 | fmovs %f0,%f14 | |
115 | clr %i2 | |
116 | fmovs %f1,%f13 | |
117 | clr %i3 | |
118 | fmovs %f0,%f12 | |
119 | clr %i4 | |
120 | fmovs %f1,%f11 | |
121 | clr %i5 | |
122 | fmovs %f0,%f10 | |
123 | clr %g1 | |
124 | fmovs %f1,%f9 | |
125 | clr %g2 | |
126 | fmovs %f0,%f8 | |
127 | clr %g3 | |
128 | fmovs %f1,%f7 | |
129 | clr %g4 | |
130 | fmovs %f0,%f6 | |
131 | clr %g5 | |
132 | fmovs %f1,%f5 | |
133 | fmovs %f0,%f4 | |
134 | fmovs %f1,%f3 | |
135 | fmovs %f0,%f2 | |
136 | ||
053fa39a | 137 | add %fp,BIAS,%i0 ! return pointer to callerĀ“s top of stack |
cee73df3 AP |
138 | |
139 | ret | |
140 | restore | |
141 | ||
142 | .zero: .long 0x0,0x0 | |
143 | .PIC.zero.up: | |
144 | retl | |
145 | add %o0,%o7,%o0 | |
146 | #ifdef DEBUG | |
147 | .global walk_reg_wins | |
148 | .type walk_reg_wins,#function | |
149 | walk_reg_wins: | |
150 | #endif | |
151 | .walk.reg.wins: | |
152 | save %sp,FRAME,%sp | |
153 | cmp %i7,%o7 | |
154 | be 2f | |
155 | clr %o0 | |
156 | cmp %o7,0 ! compiler never cleans %o7... | |
157 | be 1f ! could have been a leaf function... | |
158 | clr %o1 | |
159 | call .walk.reg.wins | |
160 | nop | |
161 | 1: clr %o2 | |
162 | clr %o3 | |
163 | clr %o4 | |
164 | clr %o5 | |
165 | clr %o7 | |
166 | clr %l0 | |
167 | clr %l1 | |
168 | clr %l2 | |
169 | clr %l3 | |
170 | clr %l4 | |
171 | clr %l5 | |
172 | clr %l6 | |
173 | clr %l7 | |
174 | add %o0,1,%i0 ! used for debugging | |
175 | 2: ret | |
176 | restore | |
177 | .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu | |
178 | ||
179 | .global OPENSSL_atomic_add | |
180 | .type OPENSSL_atomic_add,#function | |
c06b0f3d | 181 | .align 32 |
cee73df3 AP |
182 | OPENSSL_atomic_add: |
183 | #ifndef ABI64 | |
184 | subcc %g0,1,%o2 | |
185 | .word 0x95408000 !rd %ccr,%o2, see comment above | |
186 | cmp %o2,0x99 | |
187 | be .v9 | |
188 | nop | |
189 | save %sp,FRAME,%sp | |
190 | ba .enter | |
191 | nop | |
192 | #ifdef __sun | |
6a79b3cb | 193 | ! Note that you do not have to link with libthread to call thr_yield, |
cee73df3 AP |
194 | ! as libc provides a stub, which is overloaded the moment you link |
195 | ! with *either* libpthread or libthread... | |
196 | #define YIELD_CPU thr_yield | |
197 | #else | |
198 | ! applies at least to Linux and FreeBSD... Feedback expected... | |
199 | #define YIELD_CPU sched_yield | |
200 | #endif | |
201 | .spin: call YIELD_CPU | |
202 | nop | |
203 | .enter: ld [%i0],%i2 | |
204 | cmp %i2,-4096 | |
205 | be .spin | |
206 | mov -1,%i2 | |
207 | swap [%i0],%i2 | |
208 | cmp %i2,-1 | |
209 | be .spin | |
210 | add %i2,%i1,%i2 | |
211 | stbar | |
212 | st %i2,[%i0] | |
213 | sra %i2,%g0,%i0 | |
214 | ret | |
215 | restore | |
216 | .v9: | |
217 | #endif | |
218 | ld [%o0],%o2 | |
219 | 1: add %o1,%o2,%o3 | |
220 | .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3 | |
221 | cmp %o2,%o3 | |
222 | bne 1b | |
223 | mov %o3,%o2 ! cas is always fetching to dest. register | |
224 | add %o1,%o2,%o0 ! OpenSSL expects the new value | |
225 | retl | |
226 | sra %o0,%g0,%o0 ! we return signed int, remember? | |
227 | .size OPENSSL_atomic_add,.-OPENSSL_atomic_add | |
228 | ||
a00e414f | 229 | .global _sparcv9_rdtick |
c06b0f3d | 230 | .align 32 |
a00e414f | 231 | _sparcv9_rdtick: |
cee73df3 AP |
232 | subcc %g0,1,%o0 |
233 | .word 0x91408000 !rd %ccr,%o0 | |
234 | cmp %o0,0x99 | |
c06b0f3d | 235 | bne .notick |
cee73df3 | 236 | xor %o0,%o0,%o0 |
c06b0f3d AP |
237 | .word 0x91410000 !rd %tick,%o0 |
238 | retl | |
c32fcca6 | 239 | .word 0x93323020 !srlx %o0,32,%o1 |
c06b0f3d | 240 | .notick: |
cee73df3 | 241 | retl |
c06b0f3d | 242 | xor %o1,%o1,%o1 |
a00e414f AP |
243 | .type _sparcv9_rdtick,#function |
244 | .size _sparcv9_rdtick,.-_sparcv9_rdtick | |
c06b0f3d | 245 | |
4b2603e4 AP |
246 | .global _sparcv9_vis1_probe |
247 | .align 8 | |
248 | _sparcv9_vis1_probe: | |
4b2603e4 | 249 | add %sp,BIAS+2,%o1 |
4b2603e4 | 250 | .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0 |
3caeef94 AP |
251 | retl |
252 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
4b2603e4 AP |
253 | .type _sparcv9_vis1_probe,#function |
254 | .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe | |
255 | ||
7c5889bf AP |
256 | ! Probe and instrument VIS1 instruction. Output is number of cycles it |
257 | ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit | |
258 | ! is slow (documented to be 6 cycles on T2) and the core is in-order | |
259 | ! single-issue, it should be possible to distinguish Tx reliably... | |
260 | ! Observed return values are: | |
261 | ! | |
4b2603e4 | 262 | ! UltraSPARC IIe 7 |
7c5889bf AP |
263 | ! UltraSPARC III 7 |
264 | ! UltraSPARC T1 24 | |
1fda639a AP |
265 | ! SPARC T4 65(*) |
266 | ! | |
267 | ! (*) result has lesser to do with VIS instruction latencies, rdtick | |
268 | ! appears that slow, but it does the trick in sense that FP and | |
269 | ! VIS code paths are still slower than integer-only ones. | |
7c5889bf AP |
270 | ! |
271 | ! Numbers for T2 and SPARC64 V-VII are more than welcomed. | |
272 | ! | |
273 | ! It would be possible to detect specifically US-T1 by instrumenting | |
274 | ! fmul8ulx16, which is emulated on T1 and as such accounts for quite | |
275 | ! a lot of %tick-s, couple of thousand on Linux... | |
4b2603e4 | 276 | .global _sparcv9_vis1_instrument |
c32fcca6 | 277 | .align 8 |
4b2603e4 | 278 | _sparcv9_vis1_instrument: |
1fda639a AP |
279 | .word 0x81b00d80 !fxor %f0,%f0,%f0 |
280 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
7c5889bf AP |
281 | .word 0x91410000 !rd %tick,%o0 |
282 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
283 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
284 | .word 0x93410000 !rd %tick,%o1 | |
285 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
286 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
287 | .word 0x95410000 !rd %tick,%o2 | |
288 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
289 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
290 | .word 0x97410000 !rd %tick,%o3 | |
291 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
292 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
293 | .word 0x99410000 !rd %tick,%o4 | |
294 | ||
295 | ! calculate intervals | |
296 | sub %o1,%o0,%o0 | |
297 | sub %o2,%o1,%o1 | |
298 | sub %o3,%o2,%o2 | |
299 | sub %o4,%o3,%o3 | |
300 | ||
60250017 | 301 | ! find minimum value |
7c5889bf AP |
302 | cmp %o0,%o1 |
303 | .word 0x38680002 !bgu,a %xcc,.+8 | |
304 | mov %o1,%o0 | |
305 | cmp %o0,%o2 | |
306 | .word 0x38680002 !bgu,a %xcc,.+8 | |
307 | mov %o2,%o0 | |
308 | cmp %o0,%o3 | |
309 | .word 0x38680002 !bgu,a %xcc,.+8 | |
310 | mov %o3,%o0 | |
311 | ||
c32fcca6 | 312 | retl |
4b2603e4 AP |
313 | nop |
314 | .type _sparcv9_vis1_instrument,#function | |
315 | .size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument | |
316 | ||
317 | .global _sparcv9_vis2_probe | |
318 | .align 8 | |
319 | _sparcv9_vis2_probe: | |
320 | retl | |
321 | .word 0x81b00980 !bshuffle %f0,%f0,%f0 | |
322 | .type _sparcv9_vis2_probe,#function | |
323 | .size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe | |
324 | ||
325 | .global _sparcv9_fmadd_probe | |
326 | .align 8 | |
327 | _sparcv9_fmadd_probe: | |
328 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
329 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
330 | retl | |
331 | .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0 | |
332 | .type _sparcv9_fmadd_probe,#function | |
333 | .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe | |
c32fcca6 | 334 | |
1fda639a AP |
335 | .global _sparcv9_rdcfr |
336 | .align 8 | |
337 | _sparcv9_rdcfr: | |
338 | retl | |
339 | .word 0x91468000 !rd %asr26,%o0 | |
340 | .type _sparcv9_rdcfr,#function | |
341 | .size _sparcv9_rdcfr,.-_sparcv9_rdcfr | |
342 | ||
343 | .global _sparcv9_vis3_probe | |
344 | .align 8 | |
345 | _sparcv9_vis3_probe: | |
346 | retl | |
347 | .word 0x81b022a0 !xmulx %g0,%g0,%g0 | |
348 | .type _sparcv9_vis3_probe,#function | |
349 | .size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe | |
350 | ||
351 | .global _sparcv9_random | |
352 | .align 8 | |
353 | _sparcv9_random: | |
354 | retl | |
355 | .word 0x91b002a0 !random %o0 | |
356 | .type _sparcv9_random,#function | |
357 | .size _sparcv9_random,.-_sparcv9_vis3_probe | |
358 | ||
4400f6c6 AP |
359 | .global _sparcv9_fjaesx_probe |
360 | .align 8 | |
361 | _sparcv9_fjaesx_probe: | |
362 | .word 0x81b09206 !faesencx %f2,%f6,%f0 | |
363 | retl | |
364 | nop | |
365 | .size _sparcv9_fjaesx_probe,.-_sparcv9_fjaesx_probe | |
366 | ||
b2dba9bf AP |
367 | .global OPENSSL_cleanse |
368 | .align 32 | |
369 | OPENSSL_cleanse: | |
aa5c99fa | 370 | cmp %o1,14 |
b2dba9bf AP |
371 | nop |
372 | #ifdef ABI64 | |
373 | bgu %xcc,.Lot | |
374 | #else | |
375 | bgu .Lot | |
376 | #endif | |
7676eebf AP |
377 | cmp %o1,0 |
378 | bne .Little | |
379 | nop | |
380 | retl | |
b2dba9bf AP |
381 | nop |
382 | ||
383 | .Little: | |
384 | stb %g0,[%o0] | |
385 | subcc %o1,1,%o1 | |
386 | bnz .Little | |
387 | add %o0,1,%o0 | |
388 | retl | |
389 | nop | |
390 | .align 32 | |
391 | .Lot: | |
aa5c99fa AP |
392 | #ifndef ABI64 |
393 | subcc %g0,1,%g1 | |
394 | ! see above for explanation | |
395 | .word 0x83408000 !rd %ccr,%g1 | |
396 | cmp %g1,0x99 | |
397 | bne .v8lot | |
398 | nop | |
399 | #endif | |
400 | ||
401 | .v9lot: andcc %o0,7,%g0 | |
402 | bz .v9aligned | |
403 | nop | |
404 | stb %g0,[%o0] | |
405 | sub %o1,1,%o1 | |
406 | ba .v9lot | |
407 | add %o0,1,%o0 | |
408 | .align 16,0x01000000 | |
409 | .v9aligned: | |
410 | .word 0xc0720000 !stx %g0,[%o0] | |
411 | sub %o1,8,%o1 | |
412 | andcc %o1,-8,%g0 | |
413 | #ifdef ABI64 | |
414 | .word 0x126ffffd !bnz %xcc,.v9aligned | |
415 | #else | |
416 | .word 0x124ffffd !bnz %icc,.v9aligned | |
417 | #endif | |
418 | add %o0,8,%o0 | |
419 | ||
420 | cmp %o1,0 | |
421 | bne .Little | |
422 | nop | |
423 | retl | |
424 | nop | |
425 | #ifndef ABI64 | |
426 | .v8lot: andcc %o0,3,%g0 | |
427 | bz .v8aligned | |
b2dba9bf AP |
428 | nop |
429 | stb %g0,[%o0] | |
430 | sub %o1,1,%o1 | |
aa5c99fa | 431 | ba .v8lot |
b2dba9bf AP |
432 | add %o0,1,%o0 |
433 | nop | |
aa5c99fa | 434 | .v8aligned: |
b2dba9bf AP |
435 | st %g0,[%o0] |
436 | sub %o1,4,%o1 | |
437 | andcc %o1,-4,%g0 | |
aa5c99fa | 438 | bnz .v8aligned |
b2dba9bf AP |
439 | add %o0,4,%o0 |
440 | ||
441 | cmp %o1,0 | |
442 | bne .Little | |
443 | nop | |
444 | retl | |
445 | nop | |
aa5c99fa | 446 | #endif |
b2dba9bf AP |
447 | .type OPENSSL_cleanse,#function |
448 | .size OPENSSL_cleanse,.-OPENSSL_cleanse | |
449 | ||
e33826f0 AP |
450 | .global CRYPTO_memcmp |
451 | .align 16 | |
452 | CRYPTO_memcmp: | |
453 | cmp %o2,0 | |
454 | #ifdef ABI64 | |
455 | beq,pn %xcc,.Lno_data | |
456 | #else | |
457 | beq .Lno_data | |
458 | #endif | |
459 | xor %g1,%g1,%g1 | |
460 | nop | |
461 | ||
462 | .Loop_cmp: | |
463 | ldub [%o0],%o3 | |
464 | add %o0,1,%o0 | |
465 | ldub [%o1],%o4 | |
466 | add %o1,1,%o1 | |
467 | subcc %o2,1,%o2 | |
468 | xor %o3,%o4,%o4 | |
469 | #ifdef ABI64 | |
470 | bnz %xcc,.Loop_cmp | |
471 | #else | |
472 | bnz .Loop_cmp | |
473 | #endif | |
474 | or %o4,%g1,%g1 | |
475 | ||
476 | sub %g0,%g1,%g1 | |
477 | srl %g1,31,%g1 | |
478 | .Lno_data: | |
479 | retl | |
480 | mov %g1,%o0 | |
481 | .type CRYPTO_memcmp,#function | |
482 | .size CRYPTO_memcmp,.-CRYPTO_memcmp | |
483 | ||
5fabb88a AP |
484 | .global _sparcv9_vis1_instrument_bus |
485 | .align 8 | |
486 | _sparcv9_vis1_instrument_bus: | |
487 | mov %o1,%o3 ! save cnt | |
488 | .word 0x99410000 !rd %tick,%o4 ! tick | |
489 | mov %o4,%o5 ! lasttick = tick | |
490 | set 0,%g4 ! diff | |
491 | ||
492 | andn %o0,63,%g1 | |
493 | .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load | |
494 | .word 0x8143e040 !membar #Sync | |
495 | .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit | |
496 | .word 0x8143e040 !membar #Sync | |
497 | ld [%o0],%o4 | |
498 | add %o4,%g4,%g4 | |
499 | .word 0xc9e2100c !cas [%o0],%o4,%g4 | |
500 | ||
501 | .Loop: .word 0x99410000 !rd %tick,%o4 | |
502 | sub %o4,%o5,%g4 ! diff=tick-lasttick | |
503 | mov %o4,%o5 ! lasttick=tick | |
504 | ||
505 | andn %o0,63,%g1 | |
506 | .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load | |
507 | .word 0x8143e040 !membar #Sync | |
508 | .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit | |
509 | .word 0x8143e040 !membar #Sync | |
510 | ld [%o0],%o4 | |
511 | add %o4,%g4,%g4 | |
512 | .word 0xc9e2100c !cas [%o0],%o4,%g4 | |
513 | subcc %o1,1,%o1 ! --$cnt | |
514 | bnz .Loop | |
515 | add %o0,4,%o0 ! ++$out | |
516 | ||
517 | retl | |
518 | mov %o3,%o0 | |
519 | .type _sparcv9_vis1_instrument_bus,#function | |
520 | .size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus | |
521 | ||
522 | .global _sparcv9_vis1_instrument_bus2 | |
523 | .align 8 | |
524 | _sparcv9_vis1_instrument_bus2: | |
525 | mov %o1,%o3 ! save cnt | |
526 | sll %o1,2,%o1 ! cnt*=4 | |
527 | ||
528 | .word 0x99410000 !rd %tick,%o4 ! tick | |
529 | mov %o4,%o5 ! lasttick = tick | |
530 | set 0,%g4 ! diff | |
531 | ||
532 | andn %o0,63,%g1 | |
533 | .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load | |
534 | .word 0x8143e040 !membar #Sync | |
535 | .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit | |
536 | .word 0x8143e040 !membar #Sync | |
537 | ld [%o0],%o4 | |
538 | add %o4,%g4,%g4 | |
539 | .word 0xc9e2100c !cas [%o0],%o4,%g4 | |
540 | ||
541 | .word 0x99410000 !rd %tick,%o4 ! tick | |
542 | sub %o4,%o5,%g4 ! diff=tick-lasttick | |
543 | mov %o4,%o5 ! lasttick=tick | |
544 | mov %g4,%g5 ! lastdiff=diff | |
545 | .Loop2: | |
546 | andn %o0,63,%g1 | |
547 | .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load | |
548 | .word 0x8143e040 !membar #Sync | |
549 | .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit | |
550 | .word 0x8143e040 !membar #Sync | |
551 | ld [%o0],%o4 | |
552 | add %o4,%g4,%g4 | |
553 | .word 0xc9e2100c !cas [%o0],%o4,%g4 | |
554 | ||
555 | subcc %o2,1,%o2 ! --max | |
556 | bz .Ldone2 | |
557 | nop | |
558 | ||
559 | .word 0x99410000 !rd %tick,%o4 ! tick | |
560 | sub %o4,%o5,%g4 ! diff=tick-lasttick | |
561 | mov %o4,%o5 ! lasttick=tick | |
562 | cmp %g4,%g5 | |
563 | mov %g4,%g5 ! lastdiff=diff | |
564 | ||
565 | .word 0x83408000 !rd %ccr,%g1 | |
566 | and %g1,4,%g1 ! isolate zero flag | |
567 | xor %g1,4,%g1 ! flip zero flag | |
568 | ||
569 | subcc %o1,%g1,%o1 ! conditional --$cnt | |
570 | bnz .Loop2 | |
571 | add %o0,%g1,%o0 ! conditional ++$out | |
572 | ||
573 | .Ldone2: | |
574 | srl %o1,2,%o1 | |
575 | retl | |
576 | sub %o3,%o1,%o0 | |
577 | .type _sparcv9_vis1_instrument_bus2,#function | |
578 | .size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2 | |
579 | ||
c06b0f3d AP |
580 | .section ".init",#alloc,#execinstr |
581 | call OPENSSL_cpuid_setup | |
cee73df3 | 582 | nop |