]>
Commit | Line | Data |
---|---|---|
cee73df3 AP |
1 | #if defined(__SUNPRO_C) && defined(__sparcv9) |
2 | # define ABI64 /* They've said -xarch=v9 at command line */ | |
3 | #elif defined(__GNUC__) && defined(__arch64__) | |
4 | # define ABI64 /* They've said -m64 at command line */ | |
5 | #endif | |
6 | ||
7 | #ifdef ABI64 | |
8 | .register %g2,#scratch | |
9 | .register %g3,#scratch | |
10 | # define FRAME -192 | |
11 | # define BIAS 2047 | |
12 | #else | |
13 | # define FRAME -96 | |
14 | # define BIAS 0 | |
15 | #endif | |
16 | ||
17 | .text | |
18 | .align 32 | |
19 | .global OPENSSL_wipe_cpu | |
20 | .type OPENSSL_wipe_cpu,#function | |
21 | ! Keep in mind that this does not excuse us from wiping the stack! | |
22 | ! This routine wipes registers, but not the backing store [which | |
23 | ! resides on the stack, toward lower addresses]. To facilitate for | |
24 | ! stack wiping I return pointer to the top of stack of the *caller*. | |
25 | OPENSSL_wipe_cpu: | |
26 | save %sp,FRAME,%sp | |
27 | nop | |
28 | #ifdef __sun | |
29 | #include <sys/trap.h> | |
30 | ta ST_CLEAN_WINDOWS | |
31 | #else | |
32 | call .walk.reg.wins | |
33 | #endif | |
34 | nop | |
35 | call .PIC.zero.up | |
36 | mov .zero-(.-4),%o0 | |
c06b0f3d AP |
37 | ld [%o0],%f0 |
38 | ld [%o0],%f1 | |
cee73df3 AP |
39 | |
40 | subcc %g0,1,%o0 | |
41 | ! Following is V9 "rd %ccr,%o0" instruction. However! V8 | |
42 | ! specification says that it ("rd %asr2,%o0" in V8 terms) does | |
43 | ! not cause illegal_instruction trap. It therefore can be used | |
44 | ! to determine if the CPU the code is executing on is V8- or | |
45 | ! V9-compliant, as V9 returns a distinct value of 0x99, | |
46 | ! "negative" and "borrow" bits set in both %icc and %xcc. | |
47 | .word 0x91408000 !rd %ccr,%o0 | |
48 | cmp %o0,0x99 | |
49 | bne .v8 | |
50 | nop | |
51 | ! Even though we do not use %fp register bank, | |
52 | ! we wipe it as memcpy might have used it... | |
53 | .word 0xbfa00040 !fmovd %f0,%f62 | |
54 | .word 0xbba00040 !... | |
55 | .word 0xb7a00040 | |
56 | .word 0xb3a00040 | |
57 | .word 0xafa00040 | |
58 | .word 0xaba00040 | |
59 | .word 0xa7a00040 | |
60 | .word 0xa3a00040 | |
61 | .word 0x9fa00040 | |
62 | .word 0x9ba00040 | |
63 | .word 0x97a00040 | |
64 | .word 0x93a00040 | |
65 | .word 0x8fa00040 | |
66 | .word 0x8ba00040 | |
67 | .word 0x87a00040 | |
68 | .word 0x83a00040 !fmovd %f0,%f32 | |
69 | .v8: fmovs %f1,%f31 | |
70 | clr %o0 | |
71 | fmovs %f0,%f30 | |
72 | clr %o1 | |
73 | fmovs %f1,%f29 | |
74 | clr %o2 | |
75 | fmovs %f0,%f28 | |
76 | clr %o3 | |
77 | fmovs %f1,%f27 | |
78 | clr %o4 | |
79 | fmovs %f0,%f26 | |
80 | clr %o5 | |
81 | fmovs %f1,%f25 | |
82 | clr %o7 | |
83 | fmovs %f0,%f24 | |
84 | clr %l0 | |
85 | fmovs %f1,%f23 | |
86 | clr %l1 | |
87 | fmovs %f0,%f22 | |
88 | clr %l2 | |
89 | fmovs %f1,%f21 | |
90 | clr %l3 | |
91 | fmovs %f0,%f20 | |
92 | clr %l4 | |
93 | fmovs %f1,%f19 | |
94 | clr %l5 | |
95 | fmovs %f0,%f18 | |
96 | clr %l6 | |
97 | fmovs %f1,%f17 | |
98 | clr %l7 | |
99 | fmovs %f0,%f16 | |
100 | clr %i0 | |
101 | fmovs %f1,%f15 | |
102 | clr %i1 | |
103 | fmovs %f0,%f14 | |
104 | clr %i2 | |
105 | fmovs %f1,%f13 | |
106 | clr %i3 | |
107 | fmovs %f0,%f12 | |
108 | clr %i4 | |
109 | fmovs %f1,%f11 | |
110 | clr %i5 | |
111 | fmovs %f0,%f10 | |
112 | clr %g1 | |
113 | fmovs %f1,%f9 | |
114 | clr %g2 | |
115 | fmovs %f0,%f8 | |
116 | clr %g3 | |
117 | fmovs %f1,%f7 | |
118 | clr %g4 | |
119 | fmovs %f0,%f6 | |
120 | clr %g5 | |
121 | fmovs %f1,%f5 | |
122 | fmovs %f0,%f4 | |
123 | fmovs %f1,%f3 | |
124 | fmovs %f0,%f2 | |
125 | ||
a027bba2 | 126 | add %fp,BIAS,%i0 ! return pointer to callerĀ“s top of stack |
cee73df3 AP |
127 | |
128 | ret | |
129 | restore | |
130 | ||
131 | .zero: .long 0x0,0x0 | |
132 | .PIC.zero.up: | |
133 | retl | |
134 | add %o0,%o7,%o0 | |
135 | #ifdef DEBUG | |
136 | .global walk_reg_wins | |
137 | .type walk_reg_wins,#function | |
138 | walk_reg_wins: | |
139 | #endif | |
140 | .walk.reg.wins: | |
141 | save %sp,FRAME,%sp | |
142 | cmp %i7,%o7 | |
143 | be 2f | |
144 | clr %o0 | |
145 | cmp %o7,0 ! compiler never cleans %o7... | |
146 | be 1f ! could have been a leaf function... | |
147 | clr %o1 | |
148 | call .walk.reg.wins | |
149 | nop | |
150 | 1: clr %o2 | |
151 | clr %o3 | |
152 | clr %o4 | |
153 | clr %o5 | |
154 | clr %o7 | |
155 | clr %l0 | |
156 | clr %l1 | |
157 | clr %l2 | |
158 | clr %l3 | |
159 | clr %l4 | |
160 | clr %l5 | |
161 | clr %l6 | |
162 | clr %l7 | |
163 | add %o0,1,%i0 ! used for debugging | |
164 | 2: ret | |
165 | restore | |
166 | .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu | |
167 | ||
168 | .global OPENSSL_atomic_add | |
169 | .type OPENSSL_atomic_add,#function | |
c06b0f3d | 170 | .align 32 |
cee73df3 AP |
171 | OPENSSL_atomic_add: |
172 | #ifndef ABI64 | |
173 | subcc %g0,1,%o2 | |
174 | .word 0x95408000 !rd %ccr,%o2, see comment above | |
175 | cmp %o2,0x99 | |
176 | be .v9 | |
177 | nop | |
178 | save %sp,FRAME,%sp | |
179 | ba .enter | |
180 | nop | |
181 | #ifdef __sun | |
20dc93e4 | 182 | ! Note that you do not have to link with libthread to call thr_yield, |
cee73df3 AP |
183 | ! as libc provides a stub, which is overloaded the moment you link |
184 | ! with *either* libpthread or libthread... | |
185 | #define YIELD_CPU thr_yield | |
186 | #else | |
187 | ! applies at least to Linux and FreeBSD... Feedback expected... | |
188 | #define YIELD_CPU sched_yield | |
189 | #endif | |
190 | .spin: call YIELD_CPU | |
191 | nop | |
192 | .enter: ld [%i0],%i2 | |
193 | cmp %i2,-4096 | |
194 | be .spin | |
195 | mov -1,%i2 | |
196 | swap [%i0],%i2 | |
197 | cmp %i2,-1 | |
198 | be .spin | |
199 | add %i2,%i1,%i2 | |
200 | stbar | |
201 | st %i2,[%i0] | |
202 | sra %i2,%g0,%i0 | |
203 | ret | |
204 | restore | |
205 | .v9: | |
206 | #endif | |
207 | ld [%o0],%o2 | |
208 | 1: add %o1,%o2,%o3 | |
209 | .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3 | |
210 | cmp %o2,%o3 | |
211 | bne 1b | |
212 | mov %o3,%o2 ! cas is always fetching to dest. register | |
213 | add %o1,%o2,%o0 ! OpenSSL expects the new value | |
214 | retl | |
215 | sra %o0,%g0,%o0 ! we return signed int, remember? | |
216 | .size OPENSSL_atomic_add,.-OPENSSL_atomic_add | |
217 | ||
a00e414f | 218 | .global _sparcv9_rdtick |
c06b0f3d | 219 | .align 32 |
a00e414f | 220 | _sparcv9_rdtick: |
cee73df3 AP |
221 | subcc %g0,1,%o0 |
222 | .word 0x91408000 !rd %ccr,%o0 | |
223 | cmp %o0,0x99 | |
c06b0f3d | 224 | bne .notick |
cee73df3 | 225 | xor %o0,%o0,%o0 |
c06b0f3d AP |
226 | .word 0x91410000 !rd %tick,%o0 |
227 | retl | |
40b6d493 | 228 | .word 0x93323020 !srlx %o0,32,%o1 |
c06b0f3d | 229 | .notick: |
cee73df3 | 230 | retl |
c06b0f3d | 231 | xor %o1,%o1,%o1 |
a00e414f AP |
232 | .type _sparcv9_rdtick,#function |
233 | .size _sparcv9_rdtick,.-_sparcv9_rdtick | |
c06b0f3d | 234 | |
387ed39f AP |
235 | .global _sparcv9_vis1_probe |
236 | .align 8 | |
237 | _sparcv9_vis1_probe: | |
387ed39f | 238 | add %sp,BIAS+2,%o1 |
387ed39f | 239 | .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0 |
1113fc31 AP |
240 | retl |
241 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
387ed39f AP |
242 | .type _sparcv9_vis1_probe,#function |
243 | .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe | |
244 | ||
f90bf722 AP |
245 | ! Probe and instrument VIS1 instruction. Output is number of cycles it |
246 | ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit | |
247 | ! is slow (documented to be 6 cycles on T2) and the core is in-order | |
248 | ! single-issue, it should be possible to distinguish Tx reliably... | |
249 | ! Observed return values are: | |
250 | ! | |
387ed39f | 251 | ! UltraSPARC IIe 7 |
f90bf722 AP |
252 | ! UltraSPARC III 7 |
253 | ! UltraSPARC T1 24 | |
254 | ! | |
255 | ! Numbers for T2 and SPARC64 V-VII are more than welcomed. | |
256 | ! | |
257 | ! It would be possible to detect specifically US-T1 by instrumenting | |
258 | ! fmul8ulx16, which is emulated on T1 and as such accounts for quite | |
259 | ! a lot of %tick-s, couple of thousand on Linux... | |
387ed39f | 260 | .global _sparcv9_vis1_instrument |
40b6d493 | 261 | .align 8 |
387ed39f | 262 | _sparcv9_vis1_instrument: |
f90bf722 AP |
263 | .word 0x91410000 !rd %tick,%o0 |
264 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
265 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
266 | .word 0x93410000 !rd %tick,%o1 | |
267 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
268 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
269 | .word 0x95410000 !rd %tick,%o2 | |
270 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
271 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
272 | .word 0x97410000 !rd %tick,%o3 | |
273 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
274 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
275 | .word 0x99410000 !rd %tick,%o4 | |
276 | ||
277 | ! calculate intervals | |
278 | sub %o1,%o0,%o0 | |
279 | sub %o2,%o1,%o1 | |
280 | sub %o3,%o2,%o2 | |
281 | sub %o4,%o3,%o3 | |
282 | ||
283 | ! find minumum value | |
284 | cmp %o0,%o1 | |
285 | .word 0x38680002 !bgu,a %xcc,.+8 | |
286 | mov %o1,%o0 | |
287 | cmp %o0,%o2 | |
288 | .word 0x38680002 !bgu,a %xcc,.+8 | |
289 | mov %o2,%o0 | |
290 | cmp %o0,%o3 | |
291 | .word 0x38680002 !bgu,a %xcc,.+8 | |
292 | mov %o3,%o0 | |
293 | ||
40b6d493 | 294 | retl |
387ed39f AP |
295 | nop |
296 | .type _sparcv9_vis1_instrument,#function | |
297 | .size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument | |
298 | ||
299 | .global _sparcv9_vis2_probe | |
300 | .align 8 | |
301 | _sparcv9_vis2_probe: | |
302 | retl | |
303 | .word 0x81b00980 !bshuffle %f0,%f0,%f0 | |
304 | .type _sparcv9_vis2_probe,#function | |
305 | .size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe | |
306 | ||
307 | .global _sparcv9_fmadd_probe | |
308 | .align 8 | |
309 | _sparcv9_fmadd_probe: | |
310 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | |
311 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | |
312 | retl | |
313 | .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0 | |
314 | .type _sparcv9_fmadd_probe,#function | |
315 | .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe | |
40b6d493 | 316 | |
b2dba9bf AP |
317 | .global OPENSSL_cleanse |
318 | .align 32 | |
319 | OPENSSL_cleanse: | |
aa5c99fa | 320 | cmp %o1,14 |
b2dba9bf AP |
321 | nop |
322 | #ifdef ABI64 | |
323 | bgu %xcc,.Lot | |
324 | #else | |
325 | bgu .Lot | |
326 | #endif | |
1d8fa09c DSH |
327 | cmp %o1,0 |
328 | bne .Little | |
329 | nop | |
330 | retl | |
b2dba9bf AP |
331 | nop |
332 | ||
333 | .Little: | |
334 | stb %g0,[%o0] | |
335 | subcc %o1,1,%o1 | |
336 | bnz .Little | |
337 | add %o0,1,%o0 | |
338 | retl | |
339 | nop | |
340 | .align 32 | |
341 | .Lot: | |
aa5c99fa AP |
342 | #ifndef ABI64 |
343 | subcc %g0,1,%g1 | |
344 | ! see above for explanation | |
345 | .word 0x83408000 !rd %ccr,%g1 | |
346 | cmp %g1,0x99 | |
347 | bne .v8lot | |
348 | nop | |
349 | #endif | |
350 | ||
351 | .v9lot: andcc %o0,7,%g0 | |
352 | bz .v9aligned | |
353 | nop | |
354 | stb %g0,[%o0] | |
355 | sub %o1,1,%o1 | |
356 | ba .v9lot | |
357 | add %o0,1,%o0 | |
358 | .align 16,0x01000000 | |
359 | .v9aligned: | |
360 | .word 0xc0720000 !stx %g0,[%o0] | |
361 | sub %o1,8,%o1 | |
362 | andcc %o1,-8,%g0 | |
363 | #ifdef ABI64 | |
364 | .word 0x126ffffd !bnz %xcc,.v9aligned | |
365 | #else | |
366 | .word 0x124ffffd !bnz %icc,.v9aligned | |
367 | #endif | |
368 | add %o0,8,%o0 | |
369 | ||
370 | cmp %o1,0 | |
371 | bne .Little | |
372 | nop | |
373 | retl | |
374 | nop | |
375 | #ifndef ABI64 | |
376 | .v8lot: andcc %o0,3,%g0 | |
377 | bz .v8aligned | |
b2dba9bf AP |
378 | nop |
379 | stb %g0,[%o0] | |
380 | sub %o1,1,%o1 | |
aa5c99fa | 381 | ba .v8lot |
b2dba9bf AP |
382 | add %o0,1,%o0 |
383 | nop | |
aa5c99fa | 384 | .v8aligned: |
b2dba9bf AP |
385 | st %g0,[%o0] |
386 | sub %o1,4,%o1 | |
387 | andcc %o1,-4,%g0 | |
aa5c99fa | 388 | bnz .v8aligned |
b2dba9bf AP |
389 | add %o0,4,%o0 |
390 | ||
391 | cmp %o1,0 | |
392 | bne .Little | |
393 | nop | |
394 | retl | |
395 | nop | |
aa5c99fa | 396 | #endif |
b2dba9bf AP |
397 | .type OPENSSL_cleanse,#function |
398 | .size OPENSSL_cleanse,.-OPENSSL_cleanse | |
399 | ||
c06b0f3d AP |
400 | .section ".init",#alloc,#execinstr |
401 | call OPENSSL_cpuid_setup | |
cee73df3 | 402 | nop |