]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/ia64/lib1funcs.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / ia64 / lib1funcs.S
CommitLineData
8d9254fc 1/* Copyright (C) 2000-2020 Free Software Foundation, Inc.
92b4f0af
JW
2 Contributed by James E. Wilson <wilson@cygnus.com>.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
748086b7 8 the Free Software Foundation; either version 3, or (at your option)
92b4f0af
JW
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
748086b7
JJ
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
92b4f0af 19
748086b7
JJ
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
92b4f0af 24
02befdf4 25#ifdef L__divxf3
3f622353
RH
26// Compute a 80-bit IEEE double-extended quotient.
27//
28// From the Intel IA-64 Optimization Guide, choose the minimum latency
29// alternative.
30//
31// farg0 holds the dividend. farg1 holds the divisor.
02befdf4
ZW
32//
33// __divtf3 is an alternate symbol name for backward compatibility.
3f622353
RH
34
35 .text
36 .align 16
02befdf4 37 .global __divxf3
02befdf4
ZW
38 .proc __divxf3
39__divxf3:
c252db20
L
40#ifdef SHARED
41 .global __divtf3
3f622353 42__divtf3:
c252db20 43#endif
3392dafc
RH
44 cmp.eq p7, p0 = r0, r0
45 frcpa.s0 f10, p6 = farg0, farg1
3f622353 46 ;;
3392dafc
RH
47(p6) cmp.ne p7, p0 = r0, r0
48 .pred.rel.mutex p6, p7
3f622353 49(p6) fnma.s1 f11 = farg1, f10, f1
f327ea3e 50(p6) fma.s1 f12 = farg0, f10, f0
3f622353 51 ;;
f327ea3e 52(p6) fma.s1 f13 = f11, f11, f0
3392dafc 53(p6) fma.s1 f14 = f11, f11, f11
3f622353 54 ;;
3392dafc
RH
55(p6) fma.s1 f11 = f13, f13, f11
56(p6) fma.s1 f13 = f14, f10, f10
3f622353 57 ;;
3392dafc 58(p6) fma.s1 f10 = f13, f11, f10
f327ea3e 59(p6) fnma.s1 f11 = farg1, f12, farg0
3f622353 60 ;;
3392dafc 61(p6) fma.s1 f11 = f11, f10, f12
f327ea3e 62(p6) fnma.s1 f12 = farg1, f10, f1
3f622353 63 ;;
3392dafc
RH
64(p6) fma.s1 f10 = f12, f10, f10
65(p6) fnma.s1 f12 = farg1, f11, farg0
3f622353 66 ;;
f327ea3e 67(p6) fma.s0 fret0 = f12, f10, f11
3392dafc 68(p7) mov fret0 = f10
3f622353 69 br.ret.sptk rp
02befdf4 70 .endp __divxf3
3f622353
RH
71#endif
72
a2497896 73#ifdef L__divdf3
c65ebc55
JW
74// Compute a 64-bit IEEE double quotient.
75//
76// From the Intel IA-64 Optimization Guide, choose the minimum latency
77// alternative.
78//
79// farg0 holds the dividend. farg1 holds the divisor.
80
81 .text
82 .align 16
83 .global __divdf3
84 .proc __divdf3
85__divdf3:
3392dafc
RH
86 cmp.eq p7, p0 = r0, r0
87 frcpa.s0 f10, p6 = farg0, farg1
c65ebc55 88 ;;
3392dafc
RH
89(p6) cmp.ne p7, p0 = r0, r0
90 .pred.rel.mutex p6, p7
91(p6) fmpy.s1 f11 = farg0, f10
c65ebc55
JW
92(p6) fnma.s1 f12 = farg1, f10, f1
93 ;;
94(p6) fma.s1 f11 = f12, f11, f11
3392dafc 95(p6) fmpy.s1 f13 = f12, f12
c65ebc55 96 ;;
3392dafc 97(p6) fma.s1 f10 = f12, f10, f10
c65ebc55 98(p6) fma.s1 f11 = f13, f11, f11
3392dafc
RH
99 ;;
100(p6) fmpy.s1 f12 = f13, f13
c65ebc55
JW
101(p6) fma.s1 f10 = f13, f10, f10
102 ;;
103(p6) fma.d.s1 f11 = f12, f11, f11
104(p6) fma.s1 f10 = f12, f10, f10
105 ;;
106(p6) fnma.d.s1 f8 = farg1, f11, farg0
107 ;;
3392dafc
RH
108(p6) fma.d fret0 = f8, f10, f11
109(p7) mov fret0 = f10
c65ebc55
JW
110 br.ret.sptk rp
111 ;;
112 .endp __divdf3
113#endif
114
a2497896 115#ifdef L__divsf3
c65ebc55
JW
116// Compute a 32-bit IEEE float quotient.
117//
118// From the Intel IA-64 Optimization Guide, choose the minimum latency
119// alternative.
120//
121// farg0 holds the dividend. farg1 holds the divisor.
122
123 .text
124 .align 16
125 .global __divsf3
126 .proc __divsf3
127__divsf3:
938566fb 128 cmp.eq p7, p0 = r0, r0
3392dafc 129 frcpa.s0 f10, p6 = farg0, farg1
c65ebc55 130 ;;
938566fb
RH
131(p6) cmp.ne p7, p0 = r0, r0
132 .pred.rel.mutex p6, p7
3392dafc 133(p6) fmpy.s1 f8 = farg0, f10
c65ebc55
JW
134(p6) fnma.s1 f9 = farg1, f10, f1
135 ;;
136(p6) fma.s1 f8 = f9, f8, f8
3392dafc 137(p6) fmpy.s1 f9 = f9, f9
c65ebc55
JW
138 ;;
139(p6) fma.s1 f8 = f9, f8, f8
3392dafc 140(p6) fmpy.s1 f9 = f9, f9
c65ebc55 141 ;;
3392dafc 142(p6) fma.d.s1 f10 = f9, f8, f8
c65ebc55 143 ;;
938566fb
RH
144(p6) fnorm.s.s0 fret0 = f10
145(p7) mov fret0 = f10
c65ebc55
JW
146 br.ret.sptk rp
147 ;;
148 .endp __divsf3
149#endif
150
a2497896 151#ifdef L__divdi3
c65ebc55
JW
152// Compute a 64-bit integer quotient.
153//
d8d7a286
RH
154// From the Intel IA-64 Optimization Guide, choose the minimum latency
155// alternative.
c65ebc55 156//
d8d7a286 157// in0 holds the dividend. in1 holds the divisor.
c65ebc55
JW
158
159 .text
160 .align 16
161 .global __divdi3
162 .proc __divdi3
163__divdi3:
164 .regstk 2,0,0,0
165 // Transfer inputs to FP registers.
166 setf.sig f8 = in0
167 setf.sig f9 = in1
eea1d14a
L
168 // Check divide by zero.
169 cmp.ne.unc p0,p7=0,in1
c65ebc55
JW
170 ;;
171 // Convert the inputs to FP, so that they won't be treated as unsigned.
172 fcvt.xf f8 = f8
173 fcvt.xf f9 = f9
eea1d14a 174(p7) break 1
c65ebc55
JW
175 ;;
176 // Compute the reciprocal approximation.
660a0ebd 177 frcpa.s1 f10, p6 = f8, f9
2a7ffc85 178 ;;
c65ebc55 179 // 3 Newton-Raphson iterations.
d8d7a286
RH
180(p6) fnma.s1 f11 = f9, f10, f1
181(p6) fmpy.s1 f12 = f8, f10
c65ebc55 182 ;;
d8d7a286
RH
183(p6) fmpy.s1 f13 = f11, f11
184(p6) fma.s1 f12 = f11, f12, f12
c65ebc55 185 ;;
d8d7a286
RH
186(p6) fma.s1 f10 = f11, f10, f10
187(p6) fma.s1 f11 = f13, f12, f12
c65ebc55 188 ;;
d8d7a286
RH
189(p6) fma.s1 f10 = f13, f10, f10
190(p6) fnma.s1 f12 = f9, f11, f8
c65ebc55 191 ;;
d8d7a286 192(p6) fma.s1 f10 = f12, f10, f11
c65ebc55
JW
193 ;;
194 // Round quotient to an integer.
d8d7a286 195 fcvt.fx.trunc.s1 f10 = f10
c65ebc55
JW
196 ;;
197 // Transfer result to GP registers.
d8d7a286 198 getf.sig ret0 = f10
c65ebc55
JW
199 br.ret.sptk rp
200 ;;
201 .endp __divdi3
202#endif
203
a2497896 204#ifdef L__moddi3
c65ebc55
JW
205// Compute a 64-bit integer modulus.
206//
d8d7a286
RH
207// From the Intel IA-64 Optimization Guide, choose the minimum latency
208// alternative.
c65ebc55 209//
d8d7a286 210// in0 holds the dividend (a). in1 holds the divisor (b).
c65ebc55
JW
211
212 .text
213 .align 16
214 .global __moddi3
215 .proc __moddi3
216__moddi3:
217 .regstk 2,0,0,0
218 // Transfer inputs to FP registers.
d8d7a286 219 setf.sig f14 = in0
c65ebc55 220 setf.sig f9 = in1
eea1d14a
L
221 // Check divide by zero.
222 cmp.ne.unc p0,p7=0,in1
c65ebc55
JW
223 ;;
224 // Convert the inputs to FP, so that they won't be treated as unsigned.
d8d7a286 225 fcvt.xf f8 = f14
c65ebc55 226 fcvt.xf f9 = f9
eea1d14a 227(p7) break 1
c65ebc55
JW
228 ;;
229 // Compute the reciprocal approximation.
660a0ebd 230 frcpa.s1 f10, p6 = f8, f9
c65ebc55
JW
231 ;;
232 // 3 Newton-Raphson iterations.
d8d7a286
RH
233(p6) fmpy.s1 f12 = f8, f10
234(p6) fnma.s1 f11 = f9, f10, f1
c65ebc55 235 ;;
d8d7a286
RH
236(p6) fma.s1 f12 = f11, f12, f12
237(p6) fmpy.s1 f13 = f11, f11
c65ebc55 238 ;;
d8d7a286
RH
239(p6) fma.s1 f10 = f11, f10, f10
240(p6) fma.s1 f11 = f13, f12, f12
c65ebc55 241 ;;
d8d7a286
RH
242 sub in1 = r0, in1
243(p6) fma.s1 f10 = f13, f10, f10
c65ebc55
JW
244(p6) fnma.s1 f12 = f9, f11, f8
245 ;;
d8d7a286 246 setf.sig f9 = in1
660a0ebd 247(p6) fma.s1 f10 = f12, f10, f11
c65ebc55 248 ;;
660a0ebd 249 fcvt.fx.trunc.s1 f10 = f10
c65ebc55 250 ;;
d8d7a286
RH
251 // r = q * (-b) + a
252 xma.l f10 = f10, f9, f14
c65ebc55
JW
253 ;;
254 // Transfer result to GP registers.
d8d7a286 255 getf.sig ret0 = f10
c65ebc55
JW
256 br.ret.sptk rp
257 ;;
258 .endp __moddi3
259#endif
260
a2497896 261#ifdef L__udivdi3
c65ebc55
JW
262// Compute a 64-bit unsigned integer quotient.
263//
d8d7a286
RH
264// From the Intel IA-64 Optimization Guide, choose the minimum latency
265// alternative.
c65ebc55 266//
d8d7a286 267// in0 holds the dividend. in1 holds the divisor.
c65ebc55
JW
268
269 .text
270 .align 16
271 .global __udivdi3
272 .proc __udivdi3
273__udivdi3:
274 .regstk 2,0,0,0
275 // Transfer inputs to FP registers.
276 setf.sig f8 = in0
277 setf.sig f9 = in1
eea1d14a
L
278 // Check divide by zero.
279 cmp.ne.unc p0,p7=0,in1
c65ebc55
JW
280 ;;
281 // Convert the inputs to FP, to avoid FP software-assist faults.
660a0ebd
JW
282 fcvt.xuf.s1 f8 = f8
283 fcvt.xuf.s1 f9 = f9
eea1d14a 284(p7) break 1
c65ebc55
JW
285 ;;
286 // Compute the reciprocal approximation.
660a0ebd 287 frcpa.s1 f10, p6 = f8, f9
c65ebc55
JW
288 ;;
289 // 3 Newton-Raphson iterations.
d8d7a286
RH
290(p6) fnma.s1 f11 = f9, f10, f1
291(p6) fmpy.s1 f12 = f8, f10
c65ebc55 292 ;;
d8d7a286
RH
293(p6) fmpy.s1 f13 = f11, f11
294(p6) fma.s1 f12 = f11, f12, f12
c65ebc55 295 ;;
d8d7a286
RH
296(p6) fma.s1 f10 = f11, f10, f10
297(p6) fma.s1 f11 = f13, f12, f12
c65ebc55 298 ;;
d8d7a286
RH
299(p6) fma.s1 f10 = f13, f10, f10
300(p6) fnma.s1 f12 = f9, f11, f8
c65ebc55 301 ;;
2a7ffc85 302(p6) fma.s1 f10 = f12, f10, f11
c65ebc55
JW
303 ;;
304 // Round quotient to an unsigned integer.
d8d7a286 305 fcvt.fxu.trunc.s1 f10 = f10
c65ebc55
JW
306 ;;
307 // Transfer result to GP registers.
d8d7a286 308 getf.sig ret0 = f10
c65ebc55
JW
309 br.ret.sptk rp
310 ;;
311 .endp __udivdi3
312#endif
313
a2497896 314#ifdef L__umoddi3
c65ebc55
JW
315// Compute a 64-bit unsigned integer modulus.
316//
d8d7a286
RH
317// From the Intel IA-64 Optimization Guide, choose the minimum latency
318// alternative.
c65ebc55 319//
d8d7a286 320// in0 holds the dividend (a). in1 holds the divisor (b).
c65ebc55
JW
321
322 .text
323 .align 16
324 .global __umoddi3
325 .proc __umoddi3
326__umoddi3:
327 .regstk 2,0,0,0
328 // Transfer inputs to FP registers.
d8d7a286 329 setf.sig f14 = in0
c65ebc55 330 setf.sig f9 = in1
eea1d14a
L
331 // Check divide by zero.
332 cmp.ne.unc p0,p7=0,in1
c65ebc55
JW
333 ;;
334 // Convert the inputs to FP, to avoid FP software assist faults.
d8d7a286 335 fcvt.xuf.s1 f8 = f14
660a0ebd 336 fcvt.xuf.s1 f9 = f9
eea1d14a 337(p7) break 1;
c65ebc55
JW
338 ;;
339 // Compute the reciprocal approximation.
660a0ebd 340 frcpa.s1 f10, p6 = f8, f9
c65ebc55
JW
341 ;;
342 // 3 Newton-Raphson iterations.
d8d7a286
RH
343(p6) fmpy.s1 f12 = f8, f10
344(p6) fnma.s1 f11 = f9, f10, f1
c65ebc55 345 ;;
d8d7a286
RH
346(p6) fma.s1 f12 = f11, f12, f12
347(p6) fmpy.s1 f13 = f11, f11
c65ebc55 348 ;;
d8d7a286
RH
349(p6) fma.s1 f10 = f11, f10, f10
350(p6) fma.s1 f11 = f13, f12, f12
c65ebc55 351 ;;
d8d7a286
RH
352 sub in1 = r0, in1
353(p6) fma.s1 f10 = f13, f10, f10
c65ebc55
JW
354(p6) fnma.s1 f12 = f9, f11, f8
355 ;;
d8d7a286 356 setf.sig f9 = in1
660a0ebd 357(p6) fma.s1 f10 = f12, f10, f11
c65ebc55
JW
358 ;;
359 // Round quotient to an unsigned integer.
660a0ebd 360 fcvt.fxu.trunc.s1 f10 = f10
c65ebc55 361 ;;
d8d7a286
RH
362 // r = q * (-b) + a
363 xma.l f10 = f10, f9, f14
c65ebc55
JW
364 ;;
365 // Transfer result to GP registers.
d8d7a286 366 getf.sig ret0 = f10
c65ebc55
JW
367 br.ret.sptk rp
368 ;;
369 .endp __umoddi3
370#endif
371
a2497896 372#ifdef L__divsi3
c65ebc55
JW
373// Compute a 32-bit integer quotient.
374//
d8d7a286
RH
375// From the Intel IA-64 Optimization Guide, choose the minimum latency
376// alternative.
c65ebc55 377//
d8d7a286 378// in0 holds the dividend. in1 holds the divisor.
c65ebc55
JW
379
380 .text
381 .align 16
382 .global __divsi3
383 .proc __divsi3
384__divsi3:
385 .regstk 2,0,0,0
eea1d14a
L
386 // Check divide by zero.
387 cmp.ne.unc p0,p7=0,in1
d8d7a286
RH
388 sxt4 in0 = in0
389 sxt4 in1 = in1
390 ;;
c65ebc55
JW
391 setf.sig f8 = in0
392 setf.sig f9 = in1
eea1d14a 393(p7) break 1
c65ebc55 394 ;;
d8d7a286 395 mov r2 = 0x0ffdd
c65ebc55
JW
396 fcvt.xf f8 = f8
397 fcvt.xf f9 = f9
398 ;;
d8d7a286 399 setf.exp f11 = r2
4287b5f1 400 frcpa.s1 f10, p6 = f8, f9
c65ebc55 401 ;;
d8d7a286
RH
402(p6) fmpy.s1 f8 = f8, f10
403(p6) fnma.s1 f9 = f9, f10, f1
c65ebc55 404 ;;
d8d7a286
RH
405(p6) fma.s1 f8 = f9, f8, f8
406(p6) fma.s1 f9 = f9, f9, f11
c65ebc55 407 ;;
d8d7a286 408(p6) fma.s1 f10 = f9, f8, f8
c65ebc55 409 ;;
d8d7a286 410 fcvt.fx.trunc.s1 f10 = f10
c65ebc55 411 ;;
d8d7a286 412 getf.sig ret0 = f10
c65ebc55
JW
413 br.ret.sptk rp
414 ;;
415 .endp __divsi3
416#endif
417
a2497896 418#ifdef L__modsi3
c65ebc55
JW
419// Compute a 32-bit integer modulus.
420//
d8d7a286
RH
421// From the Intel IA-64 Optimization Guide, choose the minimum latency
422// alternative.
c65ebc55 423//
d8d7a286 424// in0 holds the dividend. in1 holds the divisor.
c65ebc55
JW
425
426 .text
427 .align 16
428 .global __modsi3
429 .proc __modsi3
430__modsi3:
431 .regstk 2,0,0,0
d8d7a286
RH
432 mov r2 = 0x0ffdd
433 sxt4 in0 = in0
434 sxt4 in1 = in1
435 ;;
436 setf.sig f13 = r32
c65ebc55 437 setf.sig f9 = r33
eea1d14a
L
438 // Check divide by zero.
439 cmp.ne.unc p0,p7=0,in1
c65ebc55 440 ;;
d8d7a286
RH
441 sub in1 = r0, in1
442 fcvt.xf f8 = f13
c65ebc55
JW
443 fcvt.xf f9 = f9
444 ;;
d8d7a286 445 setf.exp f11 = r2
4287b5f1 446 frcpa.s1 f10, p6 = f8, f9
eea1d14a 447(p7) break 1
c65ebc55 448 ;;
d8d7a286
RH
449(p6) fmpy.s1 f12 = f8, f10
450(p6) fnma.s1 f10 = f9, f10, f1
c65ebc55 451 ;;
d8d7a286
RH
452 setf.sig f9 = in1
453(p6) fma.s1 f12 = f10, f12, f12
454(p6) fma.s1 f10 = f10, f10, f11
c65ebc55 455 ;;
d8d7a286 456(p6) fma.s1 f10 = f10, f12, f12
c65ebc55 457 ;;
d8d7a286 458 fcvt.fx.trunc.s1 f10 = f10
c65ebc55 459 ;;
d8d7a286 460 xma.l f10 = f10, f9, f13
c65ebc55 461 ;;
d8d7a286 462 getf.sig ret0 = f10
c65ebc55
JW
463 br.ret.sptk rp
464 ;;
465 .endp __modsi3
466#endif
467
a2497896 468#ifdef L__udivsi3
c65ebc55
JW
469// Compute a 32-bit unsigned integer quotient.
470//
d8d7a286
RH
471// From the Intel IA-64 Optimization Guide, choose the minimum latency
472// alternative.
c65ebc55 473//
d8d7a286 474// in0 holds the dividend. in1 holds the divisor.
c65ebc55
JW
475
476 .text
477 .align 16
478 .global __udivsi3
479 .proc __udivsi3
480__udivsi3:
481 .regstk 2,0,0,0
d8d7a286
RH
482 mov r2 = 0x0ffdd
483 zxt4 in0 = in0
484 zxt4 in1 = in1
c65ebc55 485 ;;
d8d7a286
RH
486 setf.sig f8 = in0
487 setf.sig f9 = in1
eea1d14a
L
488 // Check divide by zero.
489 cmp.ne.unc p0,p7=0,in1
c65ebc55 490 ;;
4287b5f1
RH
491 fcvt.xf f8 = f8
492 fcvt.xf f9 = f9
eea1d14a 493(p7) break 1
4287b5f1 494 ;;
d8d7a286 495 setf.exp f11 = r2
4287b5f1 496 frcpa.s1 f10, p6 = f8, f9
c65ebc55 497 ;;
d8d7a286
RH
498(p6) fmpy.s1 f8 = f8, f10
499(p6) fnma.s1 f9 = f9, f10, f1
c65ebc55 500 ;;
d8d7a286
RH
501(p6) fma.s1 f8 = f9, f8, f8
502(p6) fma.s1 f9 = f9, f9, f11
c65ebc55 503 ;;
d8d7a286 504(p6) fma.s1 f10 = f9, f8, f8
c65ebc55 505 ;;
d8d7a286 506 fcvt.fxu.trunc.s1 f10 = f10
c65ebc55 507 ;;
d8d7a286 508 getf.sig ret0 = f10
c65ebc55
JW
509 br.ret.sptk rp
510 ;;
511 .endp __udivsi3
512#endif
513
a2497896 514#ifdef L__umodsi3
c65ebc55
JW
515// Compute a 32-bit unsigned integer modulus.
516//
d8d7a286
RH
517// From the Intel IA-64 Optimization Guide, choose the minimum latency
518// alternative.
c65ebc55 519//
d8d7a286 520// in0 holds the dividend. in1 holds the divisor.
c65ebc55
JW
521
522 .text
523 .align 16
524 .global __umodsi3
525 .proc __umodsi3
526__umodsi3:
527 .regstk 2,0,0,0
d8d7a286
RH
528 mov r2 = 0x0ffdd
529 zxt4 in0 = in0
530 zxt4 in1 = in1
c65ebc55 531 ;;
d8d7a286
RH
532 setf.sig f13 = in0
533 setf.sig f9 = in1
eea1d14a
L
534 // Check divide by zero.
535 cmp.ne.unc p0,p7=0,in1
c65ebc55 536 ;;
d8d7a286
RH
537 sub in1 = r0, in1
538 fcvt.xf f8 = f13
539 fcvt.xf f9 = f9
c65ebc55 540 ;;
d8d7a286 541 setf.exp f11 = r2
4287b5f1 542 frcpa.s1 f10, p6 = f8, f9
eea1d14a 543(p7) break 1;
c65ebc55 544 ;;
d8d7a286
RH
545(p6) fmpy.s1 f12 = f8, f10
546(p6) fnma.s1 f10 = f9, f10, f1
c65ebc55 547 ;;
2a7ffc85 548 setf.sig f9 = in1
d8d7a286
RH
549(p6) fma.s1 f12 = f10, f12, f12
550(p6) fma.s1 f10 = f10, f10, f11
c65ebc55 551 ;;
d8d7a286 552(p6) fma.s1 f10 = f10, f12, f12
c65ebc55 553 ;;
d8d7a286 554 fcvt.fxu.trunc.s1 f10 = f10
c65ebc55 555 ;;
d8d7a286 556 xma.l f10 = f10, f9, f13
c65ebc55 557 ;;
d8d7a286 558 getf.sig ret0 = f10
c65ebc55
JW
559 br.ret.sptk rp
560 ;;
561 .endp __umodsi3
562#endif
563
a2497896 564#ifdef L__save_stack_nonlocal
c65ebc55
JW
565// Notes on save/restore stack nonlocal: We read ar.bsp but write
566// ar.bspstore. This is because ar.bsp can be read at all times
567// (independent of the RSE mode) but since it's read-only we need to
568// restore the value via ar.bspstore. This is OK because
569// ar.bsp==ar.bspstore after executing "flushrs".
570
571// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer)
572
573 .text
574 .align 16
575 .global __ia64_save_stack_nonlocal
576 .proc __ia64_save_stack_nonlocal
577__ia64_save_stack_nonlocal:
97e242b0
RH
578 { .mmf
579 alloc r18 = ar.pfs, 2, 0, 0, 0
580 mov r19 = ar.rsc
581 ;;
582 }
583 { .mmi
584 flushrs
585 st8 [in0] = in1, 24
586 and r19 = 0x1c, r19
587 ;;
588 }
589 { .mmi
590 st8 [in0] = r18, -16
591 mov ar.rsc = r19
592 or r19 = 0x3, r19
593 ;;
594 }
595 { .mmi
596 mov r16 = ar.bsp
597 mov r17 = ar.rnat
598 adds r2 = 8, in0
599 ;;
600 }
601 { .mmi
602 st8 [in0] = r16
603 st8 [r2] = r17
604 }
605 { .mib
606 mov ar.rsc = r19
607 br.ret.sptk.few rp
608 ;;
609 }
c65ebc55
JW
610 .endp __ia64_save_stack_nonlocal
611#endif
612
a2497896 613#ifdef L__nonlocal_goto
97e242b0 614// void __ia64_nonlocal_goto(void *target_label, void *save_area,
c65ebc55
JW
615// void *static_chain);
616
617 .text
618 .align 16
619 .global __ia64_nonlocal_goto
620 .proc __ia64_nonlocal_goto
621__ia64_nonlocal_goto:
97e242b0
RH
622 { .mmi
623 alloc r20 = ar.pfs, 3, 0, 0, 0
624 ld8 r12 = [in1], 8
625 mov.ret.sptk rp = in0, .L0
626 ;;
627 }
628 { .mmf
629 ld8 r16 = [in1], 8
630 mov r19 = ar.rsc
631 ;;
632 }
633 { .mmi
634 flushrs
635 ld8 r17 = [in1], 8
636 and r19 = 0x1c, r19
637 ;;
638 }
639 { .mmi
640 ld8 r18 = [in1]
641 mov ar.rsc = r19
642 or r19 = 0x3, r19
643 ;;
644 }
645 { .mmi
646 mov ar.bspstore = r16
647 ;;
648 mov ar.rnat = r17
649 ;;
650 }
651 { .mmi
652 loadrs
653 invala
654 mov r15 = in2
655 ;;
656 }
657.L0: { .mib
658 mov ar.rsc = r19
659 mov ar.pfs = r18
660 br.ret.sptk.few rp
661 ;;
c65ebc55 662 }
c65ebc55
JW
663 .endp __ia64_nonlocal_goto
664#endif
9525c690 665
a2497896 666#ifdef L__restore_stack_nonlocal
9525c690
JW
667// This is mostly the same as nonlocal_goto above.
668// ??? This has not been tested yet.
669
670// void __ia64_restore_stack_nonlocal(void *save_area)
671
672 .text
673 .align 16
674 .global __ia64_restore_stack_nonlocal
675 .proc __ia64_restore_stack_nonlocal
676__ia64_restore_stack_nonlocal:
97e242b0
RH
677 { .mmf
678 alloc r20 = ar.pfs, 4, 0, 0, 0
679 ld8 r12 = [in0], 8
680 ;;
681 }
682 { .mmb
683 ld8 r16=[in0], 8
684 mov r19 = ar.rsc
685 ;;
686 }
687 { .mmi
688 flushrs
689 ld8 r17 = [in0], 8
690 and r19 = 0x1c, r19
691 ;;
692 }
693 { .mmf
694 ld8 r18 = [in0]
695 mov ar.rsc = r19
696 ;;
697 }
698 { .mmi
699 mov ar.bspstore = r16
700 ;;
701 mov ar.rnat = r17
702 or r19 = 0x3, r19
703 ;;
704 }
705 { .mmf
706 loadrs
707 invala
708 ;;
709 }
710.L0: { .mib
711 mov ar.rsc = r19
712 mov ar.pfs = r18
713 br.ret.sptk.few rp
714 ;;
9525c690 715 }
9525c690
JW
716 .endp __ia64_restore_stack_nonlocal
717#endif
97e242b0 718
a2497896 719#ifdef L__trampoline
97e242b0
RH
720// Implement the nested function trampoline. This is out of line
721// so that we don't have to bother with flushing the icache, as
722// well as making the on-stack trampoline smaller.
723//
724// The trampoline has the following form:
725//
0024a804 726// +-------------------+ >
97e242b0
RH
727// TRAMP: | __ia64_trampoline | |
728// +-------------------+ > fake function descriptor
729// | TRAMP+16 | |
0024a804 730// +-------------------+ >
97e242b0
RH
731// | target descriptor |
732// +-------------------+
733// | static link |
734// +-------------------+
735
736 .text
737 .align 16
738 .global __ia64_trampoline
739 .proc __ia64_trampoline
740__ia64_trampoline:
741 { .mmi
742 ld8 r2 = [r1], 8
743 ;;
744 ld8 r15 = [r1]
745 }
746 { .mmi
747 ld8 r3 = [r2], 8
748 ;;
749 ld8 r1 = [r2]
750 mov b6 = r3
751 }
752 { .bbb
753 br.sptk.many b6
754 ;;
755 }
756 .endp __ia64_trampoline
757#endif
02befdf4 758
c252db20 759#ifdef SHARED
02befdf4 760// Thunks for backward compatibility.
4e9db8b2 761#ifdef L_fixtfdi
02befdf4
ZW
762 .text
763 .align 16
764 .global __fixtfti
765 .proc __fixtfti
766__fixtfti:
767 { .bbb
768 br.sptk.many __fixxfti
769 ;;
770 }
771 .endp __fixtfti
4e9db8b2
SE
772#endif
773#ifdef L_fixunstfdi
02befdf4
ZW
774 .align 16
775 .global __fixunstfti
776 .proc __fixunstfti
777__fixunstfti:
778 { .bbb
779 br.sptk.many __fixunsxfti
780 ;;
781 }
782 .endp __fixunstfti
4e9db8b2 783#endif
c252db20 784#ifdef L_floatditf
02befdf4
ZW
785 .align 16
786 .global __floattitf
787 .proc __floattitf
788__floattitf:
789 { .bbb
790 br.sptk.many __floattixf
791 ;;
792 }
793 .endp __floattitf
02befdf4 794#endif
c252db20 795#endif