]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/ia64/lib1funcs.asm
ia64.c (rtx_needs_barrier): Call rtx_needs_barrier for RETURNS as well.
[thirdparty/gcc.git] / gcc / config / ia64 / lib1funcs.asm
CommitLineData
3f622353
RH
1#ifdef L__divtf3
2// Compute a 80-bit IEEE double-extended quotient.
3//
4// From the Intel IA-64 Optimization Guide, choose the minimum latency
5// alternative.
6//
7// farg0 holds the dividend. farg1 holds the divisor.
8
9 .text
10 .align 16
11 .global __divtf3
12 .proc __divtf3
13__divtf3:
3392dafc
RH
14 cmp.eq p7, p0 = r0, r0
15 frcpa.s0 f10, p6 = farg0, farg1
3f622353 16 ;;
3392dafc
RH
17(p6) cmp.ne p7, p0 = r0, r0
18 .pred.rel.mutex p6, p7
3f622353 19(p6) fnma.s1 f11 = farg1, f10, f1
3392dafc 20(p6) fmpy.s1 f12 = farg0, f10
3f622353 21 ;;
3392dafc
RH
22(p6) fmpy.s1 f13 = f11, f11
23(p6) fma.s1 f14 = f11, f11, f11
3f622353 24 ;;
3392dafc
RH
25(p6) fma.s1 f11 = f13, f13, f11
26(p6) fma.s1 f13 = f14, f10, f10
3f622353 27 ;;
3392dafc
RH
28(p6) fma.s1 f10 = f13, f11, f10
29(p6) fnma.s1 f12 = farg1, f12, farg0
3f622353 30 ;;
3392dafc
RH
31(p6) fma.s1 f11 = f11, f10, f12
32(p6) fnma.s1 f13 = farg1, f10, f1
3f622353 33 ;;
3392dafc
RH
34(p6) fma.s1 f10 = f12, f10, f10
35(p6) fnma.s1 f12 = farg1, f11, farg0
3f622353 36 ;;
3392dafc
RH
37(p6) fma fret0 = f12, f10, f11
38(p7) mov fret0 = f10
3f622353
RH
39 br.ret.sptk rp
40 ;;
41 .endp __divtf3
42#endif
43
c65ebc55
JW
44#ifdef L__divdf3
45// Compute a 64-bit IEEE double quotient.
46//
47// From the Intel IA-64 Optimization Guide, choose the minimum latency
48// alternative.
49//
50// farg0 holds the dividend. farg1 holds the divisor.
51
52 .text
53 .align 16
54 .global __divdf3
55 .proc __divdf3
56__divdf3:
3392dafc
RH
57 cmp.eq p7, p0 = r0, r0
58 frcpa.s0 f10, p6 = farg0, farg1
c65ebc55 59 ;;
3392dafc
RH
60(p6) cmp.ne p7, p0 = r0, r0
61 .pred.rel.mutex p6, p7
62(p6) fmpy.s1 f11 = farg0, f10
c65ebc55
JW
63(p6) fnma.s1 f12 = farg1, f10, f1
64 ;;
65(p6) fma.s1 f11 = f12, f11, f11
3392dafc 66(p6) fmpy.s1 f13 = f12, f12
c65ebc55 67 ;;
3392dafc 68(p6) fma.s1 f10 = f12, f10, f10
c65ebc55 69(p6) fma.s1 f11 = f13, f11, f11
3392dafc
RH
70 ;;
71(p6) fmpy.s1 f12 = f13, f13
c65ebc55
JW
72(p6) fma.s1 f10 = f13, f10, f10
73 ;;
74(p6) fma.d.s1 f11 = f12, f11, f11
75(p6) fma.s1 f10 = f12, f10, f10
76 ;;
77(p6) fnma.d.s1 f8 = farg1, f11, farg0
78 ;;
3392dafc
RH
79(p6) fma.d fret0 = f8, f10, f11
80(p7) mov fret0 = f10
c65ebc55
JW
81 br.ret.sptk rp
82 ;;
83 .endp __divdf3
84#endif
85
86#ifdef L__divsf3
87// Compute a 32-bit IEEE float quotient.
88//
89// From the Intel IA-64 Optimization Guide, choose the minimum latency
90// alternative.
91//
92// farg0 holds the dividend. farg1 holds the divisor.
93
94 .text
95 .align 16
96 .global __divsf3
97 .proc __divsf3
98__divsf3:
938566fb 99 cmp.eq p7, p0 = r0, r0
3392dafc 100 frcpa.s0 f10, p6 = farg0, farg1
c65ebc55 101 ;;
938566fb
RH
102(p6) cmp.ne p7, p0 = r0, r0
103 .pred.rel.mutex p6, p7
3392dafc 104(p6) fmpy.s1 f8 = farg0, f10
c65ebc55
JW
105(p6) fnma.s1 f9 = farg1, f10, f1
106 ;;
107(p6) fma.s1 f8 = f9, f8, f8
3392dafc 108(p6) fmpy.s1 f9 = f9, f9
c65ebc55
JW
109 ;;
110(p6) fma.s1 f8 = f9, f8, f8
3392dafc 111(p6) fmpy.s1 f9 = f9, f9
c65ebc55 112 ;;
3392dafc 113(p6) fma.d.s1 f10 = f9, f8, f8
c65ebc55 114 ;;
938566fb
RH
115(p6) fnorm.s.s0 fret0 = f10
116(p7) mov fret0 = f10
c65ebc55
JW
117 br.ret.sptk rp
118 ;;
119 .endp __divsf3
120#endif
121
122#ifdef L__divdi3
123// Compute a 64-bit integer quotient.
124//
d8d7a286
RH
125// From the Intel IA-64 Optimization Guide, choose the minimum latency
126// alternative.
c65ebc55 127//
d8d7a286 128// in0 holds the dividend. in1 holds the divisor.
c65ebc55
JW
129
130 .text
131 .align 16
132 .global __divdi3
133 .proc __divdi3
134__divdi3:
135 .regstk 2,0,0,0
136 // Transfer inputs to FP registers.
137 setf.sig f8 = in0
138 setf.sig f9 = in1
139 ;;
140 // Convert the inputs to FP, so that they won't be treated as unsigned.
141 fcvt.xf f8 = f8
142 fcvt.xf f9 = f9
143 ;;
144 // Compute the reciprocal approximation.
660a0ebd 145 frcpa.s1 f10, p6 = f8, f9
2a7ffc85 146 ;;
c65ebc55 147 // 3 Newton-Raphson iterations.
d8d7a286
RH
148(p6) fnma.s1 f11 = f9, f10, f1
149(p6) fmpy.s1 f12 = f8, f10
c65ebc55 150 ;;
d8d7a286
RH
151(p6) fmpy.s1 f13 = f11, f11
152(p6) fma.s1 f12 = f11, f12, f12
c65ebc55 153 ;;
d8d7a286
RH
154(p6) fma.s1 f10 = f11, f10, f10
155(p6) fma.s1 f11 = f13, f12, f12
c65ebc55 156 ;;
d8d7a286
RH
157(p6) fma.s1 f10 = f13, f10, f10
158(p6) fnma.s1 f12 = f9, f11, f8
c65ebc55 159 ;;
d8d7a286 160(p6) fma.s1 f10 = f12, f10, f11
c65ebc55
JW
161 ;;
162 // Round quotient to an integer.
d8d7a286 163 fcvt.fx.trunc.s1 f10 = f10
c65ebc55
JW
164 ;;
165 // Transfer result to GP registers.
d8d7a286 166 getf.sig ret0 = f10
c65ebc55
JW
167 br.ret.sptk rp
168 ;;
169 .endp __divdi3
170#endif
171
172#ifdef L__moddi3
173// Compute a 64-bit integer modulus.
174//
d8d7a286
RH
175// From the Intel IA-64 Optimization Guide, choose the minimum latency
176// alternative.
c65ebc55 177//
d8d7a286 178// in0 holds the dividend (a). in1 holds the divisor (b).
c65ebc55
JW
179
180 .text
181 .align 16
182 .global __moddi3
183 .proc __moddi3
184__moddi3:
185 .regstk 2,0,0,0
186 // Transfer inputs to FP registers.
d8d7a286 187 setf.sig f14 = in0
c65ebc55
JW
188 setf.sig f9 = in1
189 ;;
190 // Convert the inputs to FP, so that they won't be treated as unsigned.
d8d7a286 191 fcvt.xf f8 = f14
c65ebc55
JW
192 fcvt.xf f9 = f9
193 ;;
194 // Compute the reciprocal approximation.
660a0ebd 195 frcpa.s1 f10, p6 = f8, f9
c65ebc55
JW
196 ;;
197 // 3 Newton-Raphson iterations.
d8d7a286
RH
198(p6) fmpy.s1 f12 = f8, f10
199(p6) fnma.s1 f11 = f9, f10, f1
c65ebc55 200 ;;
d8d7a286
RH
201(p6) fma.s1 f12 = f11, f12, f12
202(p6) fmpy.s1 f13 = f11, f11
c65ebc55 203 ;;
d8d7a286
RH
204(p6) fma.s1 f10 = f11, f10, f10
205(p6) fma.s1 f11 = f13, f12, f12
c65ebc55 206 ;;
d8d7a286
RH
207 sub in1 = r0, in1
208(p6) fma.s1 f10 = f13, f10, f10
c65ebc55
JW
209(p6) fnma.s1 f12 = f9, f11, f8
210 ;;
d8d7a286 211 setf.sig f9 = in1
660a0ebd 212(p6) fma.s1 f10 = f12, f10, f11
c65ebc55 213 ;;
660a0ebd 214 fcvt.fx.trunc.s1 f10 = f10
c65ebc55 215 ;;
d8d7a286
RH
216 // r = q * (-b) + a
217 xma.l f10 = f10, f9, f14
c65ebc55
JW
218 ;;
219 // Transfer result to GP registers.
d8d7a286 220 getf.sig ret0 = f10
c65ebc55
JW
221 br.ret.sptk rp
222 ;;
223 .endp __moddi3
224#endif
225
226#ifdef L__udivdi3
227// Compute a 64-bit unsigned integer quotient.
228//
d8d7a286
RH
229// From the Intel IA-64 Optimization Guide, choose the minimum latency
230// alternative.
c65ebc55 231//
d8d7a286 232// in0 holds the dividend. in1 holds the divisor.
c65ebc55
JW
233
234 .text
235 .align 16
236 .global __udivdi3
237 .proc __udivdi3
238__udivdi3:
239 .regstk 2,0,0,0
240 // Transfer inputs to FP registers.
241 setf.sig f8 = in0
242 setf.sig f9 = in1
243 ;;
244 // Convert the inputs to FP, to avoid FP software-assist faults.
660a0ebd
JW
245 fcvt.xuf.s1 f8 = f8
246 fcvt.xuf.s1 f9 = f9
c65ebc55
JW
247 ;;
248 // Compute the reciprocal approximation.
660a0ebd 249 frcpa.s1 f10, p6 = f8, f9
c65ebc55
JW
250 ;;
251 // 3 Newton-Raphson iterations.
d8d7a286
RH
252(p6) fnma.s1 f11 = f9, f10, f1
253(p6) fmpy.s1 f12 = f8, f10
c65ebc55 254 ;;
d8d7a286
RH
255(p6) fmpy.s1 f13 = f11, f11
256(p6) fma.s1 f12 = f11, f12, f12
c65ebc55 257 ;;
d8d7a286
RH
258(p6) fma.s1 f10 = f11, f10, f10
259(p6) fma.s1 f11 = f13, f12, f12
c65ebc55 260 ;;
d8d7a286
RH
261(p6) fma.s1 f10 = f13, f10, f10
262(p6) fnma.s1 f12 = f9, f11, f8
c65ebc55 263 ;;
2a7ffc85 264(p6) fma.s1 f10 = f12, f10, f11
c65ebc55
JW
265 ;;
266 // Round quotient to an unsigned integer.
d8d7a286 267 fcvt.fxu.trunc.s1 f10 = f10
c65ebc55
JW
268 ;;
269 // Transfer result to GP registers.
d8d7a286 270 getf.sig ret0 = f10
c65ebc55
JW
271 br.ret.sptk rp
272 ;;
273 .endp __udivdi3
274#endif
275
276#ifdef L__umoddi3
277// Compute a 64-bit unsigned integer modulus.
278//
d8d7a286
RH
279// From the Intel IA-64 Optimization Guide, choose the minimum latency
280// alternative.
c65ebc55 281//
d8d7a286 282// in0 holds the dividend (a). in1 holds the divisor (b).
c65ebc55
JW
283
284 .text
285 .align 16
286 .global __umoddi3
287 .proc __umoddi3
288__umoddi3:
289 .regstk 2,0,0,0
290 // Transfer inputs to FP registers.
d8d7a286 291 setf.sig f14 = in0
c65ebc55
JW
292 setf.sig f9 = in1
293 ;;
294 // Convert the inputs to FP, to avoid FP software assist faults.
d8d7a286 295 fcvt.xuf.s1 f8 = f14
660a0ebd 296 fcvt.xuf.s1 f9 = f9
c65ebc55
JW
297 ;;
298 // Compute the reciprocal approximation.
660a0ebd 299 frcpa.s1 f10, p6 = f8, f9
c65ebc55
JW
300 ;;
301 // 3 Newton-Raphson iterations.
d8d7a286
RH
302(p6) fmpy.s1 f12 = f8, f10
303(p6) fnma.s1 f11 = f9, f10, f1
c65ebc55 304 ;;
d8d7a286
RH
305(p6) fma.s1 f12 = f11, f12, f12
306(p6) fmpy.s1 f13 = f11, f11
c65ebc55 307 ;;
d8d7a286
RH
308(p6) fma.s1 f10 = f11, f10, f10
309(p6) fma.s1 f11 = f13, f12, f12
c65ebc55 310 ;;
d8d7a286
RH
311 sub in1 = r0, in1
312(p6) fma.s1 f10 = f13, f10, f10
c65ebc55
JW
313(p6) fnma.s1 f12 = f9, f11, f8
314 ;;
d8d7a286 315 setf.sig f9 = in1
660a0ebd 316(p6) fma.s1 f10 = f12, f10, f11
c65ebc55
JW
317 ;;
318 // Round quotient to an unsigned integer.
660a0ebd 319 fcvt.fxu.trunc.s1 f10 = f10
c65ebc55 320 ;;
d8d7a286
RH
321 // r = q * (-b) + a
322 xma.l f10 = f10, f9, f14
c65ebc55
JW
323 ;;
324 // Transfer result to GP registers.
d8d7a286 325 getf.sig ret0 = f10
c65ebc55
JW
326 br.ret.sptk rp
327 ;;
328 .endp __umoddi3
329#endif
330
331#ifdef L__divsi3
332// Compute a 32-bit integer quotient.
333//
d8d7a286
RH
334// From the Intel IA-64 Optimization Guide, choose the minimum latency
335// alternative.
c65ebc55 336//
d8d7a286 337// in0 holds the dividend. in1 holds the divisor.
c65ebc55
JW
338
339 .text
340 .align 16
341 .global __divsi3
342 .proc __divsi3
343__divsi3:
344 .regstk 2,0,0,0
d8d7a286
RH
345 sxt4 in0 = in0
346 sxt4 in1 = in1
347 ;;
c65ebc55
JW
348 setf.sig f8 = in0
349 setf.sig f9 = in1
350 ;;
d8d7a286 351 mov r2 = 0x0ffdd
c65ebc55
JW
352 fcvt.xf f8 = f8
353 fcvt.xf f9 = f9
354 ;;
d8d7a286 355 setf.exp f11 = r2
4287b5f1 356 frcpa.s1 f10, p6 = f8, f9
c65ebc55 357 ;;
d8d7a286
RH
358(p6) fmpy.s1 f8 = f8, f10
359(p6) fnma.s1 f9 = f9, f10, f1
c65ebc55 360 ;;
d8d7a286
RH
361(p6) fma.s1 f8 = f9, f8, f8
362(p6) fma.s1 f9 = f9, f9, f11
c65ebc55 363 ;;
d8d7a286 364(p6) fma.s1 f10 = f9, f8, f8
c65ebc55 365 ;;
d8d7a286 366 fcvt.fx.trunc.s1 f10 = f10
c65ebc55 367 ;;
d8d7a286 368 getf.sig ret0 = f10
c65ebc55
JW
369 br.ret.sptk rp
370 ;;
371 .endp __divsi3
372#endif
373
374#ifdef L__modsi3
375// Compute a 32-bit integer modulus.
376//
d8d7a286
RH
377// From the Intel IA-64 Optimization Guide, choose the minimum latency
378// alternative.
c65ebc55 379//
d8d7a286 380// in0 holds the dividend. in1 holds the divisor.
c65ebc55
JW
381
382 .text
383 .align 16
384 .global __modsi3
385 .proc __modsi3
386__modsi3:
387 .regstk 2,0,0,0
d8d7a286
RH
388 mov r2 = 0x0ffdd
389 sxt4 in0 = in0
390 sxt4 in1 = in1
391 ;;
392 setf.sig f13 = r32
c65ebc55
JW
393 setf.sig f9 = r33
394 ;;
d8d7a286
RH
395 sub in1 = r0, in1
396 fcvt.xf f8 = f13
c65ebc55
JW
397 fcvt.xf f9 = f9
398 ;;
d8d7a286 399 setf.exp f11 = r2
4287b5f1 400 frcpa.s1 f10, p6 = f8, f9
c65ebc55 401 ;;
d8d7a286
RH
402(p6) fmpy.s1 f12 = f8, f10
403(p6) fnma.s1 f10 = f9, f10, f1
c65ebc55 404 ;;
d8d7a286
RH
405 setf.sig f9 = in1
406(p6) fma.s1 f12 = f10, f12, f12
407(p6) fma.s1 f10 = f10, f10, f11
c65ebc55 408 ;;
d8d7a286 409(p6) fma.s1 f10 = f10, f12, f12
c65ebc55 410 ;;
d8d7a286 411 fcvt.fx.trunc.s1 f10 = f10
c65ebc55 412 ;;
d8d7a286 413 xma.l f10 = f10, f9, f13
c65ebc55 414 ;;
d8d7a286 415 getf.sig ret0 = f10
c65ebc55
JW
416 br.ret.sptk rp
417 ;;
418 .endp __modsi3
419#endif
420
421#ifdef L__udivsi3
422// Compute a 32-bit unsigned integer quotient.
423//
d8d7a286
RH
424// From the Intel IA-64 Optimization Guide, choose the minimum latency
425// alternative.
c65ebc55 426//
d8d7a286 427// in0 holds the dividend. in1 holds the divisor.
c65ebc55
JW
428
429 .text
430 .align 16
431 .global __udivsi3
432 .proc __udivsi3
433__udivsi3:
434 .regstk 2,0,0,0
d8d7a286
RH
435 mov r2 = 0x0ffdd
436 zxt4 in0 = in0
437 zxt4 in1 = in1
c65ebc55 438 ;;
d8d7a286
RH
439 setf.sig f8 = in0
440 setf.sig f9 = in1
c65ebc55 441 ;;
4287b5f1
RH
442 fcvt.xf f8 = f8
443 fcvt.xf f9 = f9
444 ;;
d8d7a286 445 setf.exp f11 = r2
4287b5f1 446 frcpa.s1 f10, p6 = f8, f9
c65ebc55 447 ;;
d8d7a286
RH
448(p6) fmpy.s1 f8 = f8, f10
449(p6) fnma.s1 f9 = f9, f10, f1
c65ebc55 450 ;;
d8d7a286
RH
451(p6) fma.s1 f8 = f9, f8, f8
452(p6) fma.s1 f9 = f9, f9, f11
c65ebc55 453 ;;
d8d7a286 454(p6) fma.s1 f10 = f9, f8, f8
c65ebc55 455 ;;
d8d7a286 456 fcvt.fxu.trunc.s1 f10 = f10
c65ebc55 457 ;;
d8d7a286 458 getf.sig ret0 = f10
c65ebc55
JW
459 br.ret.sptk rp
460 ;;
461 .endp __udivsi3
462#endif
463
464#ifdef L__umodsi3
465// Compute a 32-bit unsigned integer modulus.
466//
d8d7a286
RH
467// From the Intel IA-64 Optimization Guide, choose the minimum latency
468// alternative.
c65ebc55 469//
d8d7a286 470// in0 holds the dividend. in1 holds the divisor.
c65ebc55
JW
471
472 .text
473 .align 16
474 .global __umodsi3
475 .proc __umodsi3
476__umodsi3:
477 .regstk 2,0,0,0
d8d7a286
RH
478 mov r2 = 0x0ffdd
479 zxt4 in0 = in0
480 zxt4 in1 = in1
c65ebc55 481 ;;
d8d7a286
RH
482 setf.sig f13 = in0
483 setf.sig f9 = in1
c65ebc55 484 ;;
d8d7a286
RH
485 sub in1 = r0, in1
486 fcvt.xf f8 = f13
487 fcvt.xf f9 = f9
c65ebc55 488 ;;
d8d7a286 489 setf.exp f11 = r2
4287b5f1 490 frcpa.s1 f10, p6 = f8, f9
c65ebc55 491 ;;
d8d7a286
RH
492(p6) fmpy.s1 f12 = f8, f10
493(p6) fnma.s1 f10 = f9, f10, f1
c65ebc55 494 ;;
2a7ffc85 495 setf.sig f9 = in1
d8d7a286
RH
496(p6) fma.s1 f12 = f10, f12, f12
497(p6) fma.s1 f10 = f10, f10, f11
c65ebc55 498 ;;
d8d7a286 499(p6) fma.s1 f10 = f10, f12, f12
c65ebc55 500 ;;
d8d7a286 501 fcvt.fxu.trunc.s1 f10 = f10
c65ebc55 502 ;;
d8d7a286 503 xma.l f10 = f10, f9, f13
c65ebc55 504 ;;
d8d7a286 505 getf.sig ret0 = f10
c65ebc55
JW
506 br.ret.sptk rp
507 ;;
508 .endp __umodsi3
509#endif
510
511#ifdef L__save_stack_nonlocal
512// Notes on save/restore stack nonlocal: We read ar.bsp but write
513// ar.bspstore. This is because ar.bsp can be read at all times
514// (independent of the RSE mode) but since it's read-only we need to
515// restore the value via ar.bspstore. This is OK because
516// ar.bsp==ar.bspstore after executing "flushrs".
517
518// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer)
519
520 .text
521 .align 16
522 .global __ia64_save_stack_nonlocal
523 .proc __ia64_save_stack_nonlocal
524__ia64_save_stack_nonlocal:
97e242b0
RH
525 { .mmf
526 alloc r18 = ar.pfs, 2, 0, 0, 0
527 mov r19 = ar.rsc
528 ;;
529 }
530 { .mmi
531 flushrs
532 st8 [in0] = in1, 24
533 and r19 = 0x1c, r19
534 ;;
535 }
536 { .mmi
537 st8 [in0] = r18, -16
538 mov ar.rsc = r19
539 or r19 = 0x3, r19
540 ;;
541 }
542 { .mmi
543 mov r16 = ar.bsp
544 mov r17 = ar.rnat
545 adds r2 = 8, in0
546 ;;
547 }
548 { .mmi
549 st8 [in0] = r16
550 st8 [r2] = r17
551 }
552 { .mib
553 mov ar.rsc = r19
554 br.ret.sptk.few rp
555 ;;
556 }
c65ebc55
JW
557 .endp __ia64_save_stack_nonlocal
558#endif
559
560#ifdef L__nonlocal_goto
97e242b0 561// void __ia64_nonlocal_goto(void *target_label, void *save_area,
c65ebc55
JW
562// void *static_chain);
563
564 .text
565 .align 16
566 .global __ia64_nonlocal_goto
567 .proc __ia64_nonlocal_goto
568__ia64_nonlocal_goto:
97e242b0
RH
569 { .mmi
570 alloc r20 = ar.pfs, 3, 0, 0, 0
571 ld8 r12 = [in1], 8
572 mov.ret.sptk rp = in0, .L0
573 ;;
574 }
575 { .mmf
576 ld8 r16 = [in1], 8
577 mov r19 = ar.rsc
578 ;;
579 }
580 { .mmi
581 flushrs
582 ld8 r17 = [in1], 8
583 and r19 = 0x1c, r19
584 ;;
585 }
586 { .mmi
587 ld8 r18 = [in1]
588 mov ar.rsc = r19
589 or r19 = 0x3, r19
590 ;;
591 }
592 { .mmi
593 mov ar.bspstore = r16
594 ;;
595 mov ar.rnat = r17
596 ;;
597 }
598 { .mmi
599 loadrs
600 invala
601 mov r15 = in2
602 ;;
603 }
604.L0: { .mib
605 mov ar.rsc = r19
606 mov ar.pfs = r18
607 br.ret.sptk.few rp
608 ;;
c65ebc55 609 }
c65ebc55
JW
610 .endp __ia64_nonlocal_goto
611#endif
9525c690
JW
612
613#ifdef L__restore_stack_nonlocal
614// This is mostly the same as nonlocal_goto above.
615// ??? This has not been tested yet.
616
617// void __ia64_restore_stack_nonlocal(void *save_area)
618
619 .text
620 .align 16
621 .global __ia64_restore_stack_nonlocal
622 .proc __ia64_restore_stack_nonlocal
623__ia64_restore_stack_nonlocal:
97e242b0
RH
624 { .mmf
625 alloc r20 = ar.pfs, 4, 0, 0, 0
626 ld8 r12 = [in0], 8
627 ;;
628 }
629 { .mmb
630 ld8 r16=[in0], 8
631 mov r19 = ar.rsc
632 ;;
633 }
634 { .mmi
635 flushrs
636 ld8 r17 = [in0], 8
637 and r19 = 0x1c, r19
638 ;;
639 }
640 { .mmf
641 ld8 r18 = [in0]
642 mov ar.rsc = r19
643 ;;
644 }
645 { .mmi
646 mov ar.bspstore = r16
647 ;;
648 mov ar.rnat = r17
649 or r19 = 0x3, r19
650 ;;
651 }
652 { .mmf
653 loadrs
654 invala
655 ;;
656 }
657.L0: { .mib
658 mov ar.rsc = r19
659 mov ar.pfs = r18
660 br.ret.sptk.few rp
661 ;;
9525c690 662 }
9525c690
JW
663 .endp __ia64_restore_stack_nonlocal
664#endif
97e242b0
RH
665
666#ifdef L__trampoline
667// Implement the nested function trampoline. This is out of line
668// so that we don't have to bother with flushing the icache, as
669// well as making the on-stack trampoline smaller.
670//
671// The trampoline has the following form:
672//
0024a804 673// +-------------------+ >
97e242b0
RH
674// TRAMP: | __ia64_trampoline | |
675// +-------------------+ > fake function descriptor
676// | TRAMP+16 | |
0024a804 677// +-------------------+ >
97e242b0
RH
678// | target descriptor |
679// +-------------------+
680// | static link |
681// +-------------------+
682
683 .text
684 .align 16
685 .global __ia64_trampoline
686 .proc __ia64_trampoline
687__ia64_trampoline:
688 { .mmi
689 ld8 r2 = [r1], 8
690 ;;
691 ld8 r15 = [r1]
692 }
693 { .mmi
694 ld8 r3 = [r2], 8
695 ;;
696 ld8 r1 = [r2]
697 mov b6 = r3
698 }
699 { .bbb
700 br.sptk.many b6
701 ;;
702 }
703 .endp __ia64_trampoline
704#endif