]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/ia64cpuid.S
f942648bae35f77faa5488a6be7d0e78d5fde305
[thirdparty/openssl.git] / crypto / ia64cpuid.S
1 // Works on all IA-64 platforms: Linux, HP-UX, Win64i...
2 // On Win64i compile with ias.exe.
3 .text
4
5 #if defined(_HPUX_SOURCE) && !defined(_LP64)
6 #define ADDP addp4
7 #else
8 #define ADDP add
9 #endif
10
11 .global OPENSSL_cpuid_setup#
12 .proc OPENSSL_cpuid_setup#
13 OPENSSL_cpuid_setup:
14 { .mib; br.ret.sptk.many b0 };;
15 .endp OPENSSL_cpuid_setup#
16
17 .global OPENSSL_rdtsc#
18 .proc OPENSSL_rdtsc#
19 OPENSSL_rdtsc:
20 { .mib; mov r8=ar.itc
21 br.ret.sptk.many b0 };;
22 .endp OPENSSL_rdtsc#
23
24 .global OPENSSL_atomic_add#
25 .proc OPENSSL_atomic_add#
26 .align 32
27 OPENSSL_atomic_add:
28 { .mii; ld4 r2=[r32]
29 nop.i 0
30 nop.i 0 };;
31 .Lspin:
32 { .mii; mov ar.ccv=r2
33 add r8=r2,r33
34 mov r3=r2 };;
35 { .mmi; mf;;
36 cmpxchg4.acq r2=[r32],r8,ar.ccv
37 nop.i 0 };;
38 { .mib; cmp.ne p6,p0=r2,r3
39 nop.i 0
40 (p6) br.dpnt .Lspin };;
41 { .mib; nop.m 0
42 sxt4 r8=r8
43 br.ret.sptk.many b0 };;
44 .endp OPENSSL_atomic_add#
45
46 // Returns a structure comprising pointer to the top of stack of
47 // the caller and pointer beyond backing storage for the current
48 // register frame. The latter is required, because it might be
49 // insufficient to wipe backing storage for the current frame
50 // (as this procedure does), one might have to go further, toward
51 // higher addresses to reach for whole "retroactively" saved
52 // context...
53 .global OPENSSL_wipe_cpu#
54 .proc OPENSSL_wipe_cpu#
55 .align 32
56 OPENSSL_wipe_cpu:
57 .prologue
58 .fframe 0
59 .save ar.pfs,r2
60 .save ar.lc,r3
61 { .mib; alloc r2=ar.pfs,0,96,0,96
62 mov r3=ar.lc
63 brp.loop.imp .L_wipe_top,.L_wipe_end-16
64 };;
65 { .mii; mov r9=ar.bsp
66 mov r8=pr
67 mov ar.lc=96 };;
68 .body
69 { .mii; add r9=96*8-8,r9
70 mov ar.ec=1 };;
71
72 // One can sweep double as fast, but then we can't quarantee
73 // that backing storage is wiped...
74 .L_wipe_top:
75 { .mfi; st8 [r9]=r0,-8
76 mov f127=f0
77 mov r127=r0 }
78 { .mfb; nop.m 0
79 nop.f 0
80 br.ctop.sptk .L_wipe_top };;
81 .L_wipe_end:
82
83 { .mfi; mov r11=r0
84 mov f6=f0
85 mov r14=r0 }
86 { .mfi; mov r15=r0
87 mov f7=f0
88 mov r16=r0 }
89 { .mfi; mov r17=r0
90 mov f8=f0
91 mov r18=r0 }
92 { .mfi; mov r19=r0
93 mov f9=f0
94 mov r20=r0 }
95 { .mfi; mov r21=r0
96 mov f10=f0
97 mov r22=r0 }
98 { .mfi; mov r23=r0
99 mov f11=f0
100 mov r24=r0 }
101 { .mfi; mov r25=r0
102 mov f12=f0
103 mov r26=r0 }
104 { .mfi; mov r27=r0
105 mov f13=f0
106 mov r28=r0 }
107 { .mfi; mov r29=r0
108 mov f14=f0
109 mov r30=r0 }
110 { .mfi; mov r31=r0
111 mov f15=f0
112 nop.i 0 }
113 { .mfi; mov f16=f0 }
114 { .mfi; mov f17=f0 }
115 { .mfi; mov f18=f0 }
116 { .mfi; mov f19=f0 }
117 { .mfi; mov f20=f0 }
118 { .mfi; mov f21=f0 }
119 { .mfi; mov f22=f0 }
120 { .mfi; mov f23=f0 }
121 { .mfi; mov f24=f0 }
122 { .mfi; mov f25=f0 }
123 { .mfi; mov f26=f0 }
124 { .mfi; mov f27=f0 }
125 { .mfi; mov f28=f0 }
126 { .mfi; mov f29=f0 }
127 { .mfi; mov f30=f0 }
128 { .mfi; add r9=96*8+8,r9
129 mov f31=f0
130 mov pr=r8,0x1ffff }
131 { .mib; mov r8=sp
132 mov ar.lc=r3
133 br.ret.sptk b0 };;
134 .endp OPENSSL_wipe_cpu#
135
136 .global OPENSSL_cleanse#
137 .proc OPENSSL_cleanse#
138 OPENSSL_cleanse:
139 { .mib; cmp.eq p6,p0=0,r33 // len==0
140 ADDP r32=0,r32
141 (p6) br.ret.spnt b0 };;
142 { .mib; and r2=7,r32
143 cmp.leu p6,p0=15,r33 // len>=15
144 (p6) br.cond.dptk .Lot };;
145
146 .Little:
147 { .mib; st1 [r32]=r0,1
148 cmp.ltu p6,p7=1,r33 } // len>1
149 { .mbb; add r33=-1,r33 // len--
150 (p6) br.cond.dptk .Little
151 (p7) br.ret.sptk.many b0 };;
152
153 .Lot:
154 { .mib; cmp.eq p6,p0=0,r2
155 (p6) br.cond.dptk .Laligned };;
156 { .mmi; st1 [r32]=r0,1;;
157 and r2=7,r32 }
158 { .mib; add r33=-1,r33
159 br .Lot };;
160
161 .Laligned:
162 { .mmi; st8 [r32]=r0,8
163 and r2=-8,r33 // len&~7
164 add r33=-8,r33 };; // len-=8
165 { .mib; cmp.ltu p6,p0=8,r2 // ((len+8)&~7)>8
166 (p6) br.cond.dptk .Laligned };;
167
168 { .mbb; cmp.eq p6,p7=r0,r33
169 (p7) br.cond.dpnt .Little
170 (p6) br.ret.sptk.many b0 };;
171 .endp OPENSSL_cleanse#
172
173 .global CRYPTO_memcmp#
174 .proc CRYPTO_memcmp#
175 .align 32
176 .skip 16
177 CRYPTO_memcmp:
178 .prologue
179 { .mib; mov r8=0
180 cmp.eq p6,p0=0,r34 // len==0?
181 (p6) br.ret.spnt b0 };;
182 .save ar.pfs,r2
183 { .mib; alloc r2=ar.pfs,3,5,0,8
184 .save ar.lc,r3
185 mov r3=ar.lc
186 brp.loop.imp .Loop_cmp_ctop,.Loop_cmp_cend-16
187 }
188 { .mib; sub r10=r34,r0,1
189 .save pr,r9
190 mov r9=pr };;
191 { .mii; ADDP r16=0,r32
192 mov ar.lc=r10
193 mov ar.ec=4 }
194 { .mib; ADDP r17=0,r33
195 mov pr.rot=1<<16 };;
196
197 .Loop_cmp_ctop:
198 { .mib; (p16) ld1 r32=[r16],1
199 (p18) xor r34=r34,r38 }
200 { .mib; (p16) ld1 r36=[r17],1
201 (p19) or r8=r8,r35
202 br.ctop.sptk .Loop_cmp_ctop };;
203 .Loop_cmp_cend:
204
205 { .mib; cmp.ne p6,p0=0,r8
206 mov ar.lc=r3 };;
207 { .mib;
208 (p6) mov r8=1
209 mov pr=r9,0x1ffff
210 br.ret.sptk.many b0 };;
211 .endp CRYPTO_memcmp#
212
213 .global OPENSSL_instrument_bus#
214 .proc OPENSSL_instrument_bus#
215 OPENSSL_instrument_bus:
216 { .mmi; mov r2=r33
217 ADDP r32=0,r32 }
218 { .mmi; mov r8=ar.itc;;
219 mov r10=r0
220 mov r9=r8 };;
221
222 { .mmi; fc r32;;
223 ld4 r8=[r32] };;
224 { .mmi; mf
225 mov ar.ccv=r8
226 add r8=r8,r10 };;
227 { .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv
228 };;
229 .Loop:
230 { .mmi; mov r8=ar.itc;;
231 sub r10=r8,r9 // diff=tick-lasttick
232 mov r9=r8 };; // lasttick=tick
233 { .mmi; fc r32;;
234 ld4 r8=[r32] };;
235 { .mmi; mf
236 mov ar.ccv=r8
237 add r8=r8,r10 };;
238 { .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv
239 add r33=-1,r33
240 add r32=4,r32 };;
241 { .mib; cmp4.ne p6,p0=0,r33
242 (p6) br.cond.dptk .Loop };;
243
244 { .mib; sub r8=r2,r33
245 br.ret.sptk.many b0 };;
246 .endp OPENSSL_instrument_bus#
247
248 .global OPENSSL_instrument_bus2#
249 .proc OPENSSL_instrument_bus2#
250 OPENSSL_instrument_bus2:
251 { .mmi; mov r2=r33 // put aside cnt
252 ADDP r32=0,r32 }
253 { .mmi; mov r8=ar.itc;;
254 mov r10=r0
255 mov r9=r8 };;
256
257 { .mmi; fc r32;;
258 ld4 r8=[r32] };;
259 { .mmi; mf
260 mov ar.ccv=r8
261 add r8=r8,r10 };;
262 { .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv
263 };;
264
265 { .mmi; mov r8=ar.itc;;
266 sub r10=r8,r9
267 mov r9=r8 };;
268 .Loop2:
269 { .mmi; mov r11=r10 // lastdiff=diff
270 add r34=-1,r34 };; // --max
271 { .mmi; fc r32;;
272 ld4 r8=[r32]
273 cmp4.eq p6,p0=0,r34 };;
274 { .mmi; mf
275 mov ar.ccv=r8
276 add r8=r8,r10 };;
277 { .mmb; cmpxchg4.acq r3=[r32],r8,ar.ccv
278 (p6) br.cond.spnt .Ldone2 };;
279
280 { .mmi; mov r8=ar.itc;;
281 sub r10=r8,r9 // diff=tick-lasttick
282 mov r9=r8 };; // lasttick=tick
283 { .mmi; cmp.ne p6,p0=r10,r11;; // diff!=lastdiff
284 (p6) add r33=-1,r33 };; // conditional --cnt
285 { .mib; cmp4.ne p7,p0=0,r33
286 (p6) add r32=4,r32 // conditional ++out
287 (p7) br.cond.dptk .Loop2 };;
288 .Ldone2:
289 { .mib; sub r8=r2,r33
290 br.ret.sptk.many b0 };;
291 .endp OPENSSL_instrument_bus2#