]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/ia64/dl-trampoline.S
ia64: move from main tree
[thirdparty/glibc.git] / sysdeps / ia64 / dl-trampoline.S
1 /* PLT trampolines. ia64 version.
2 Copyright (C) 2005 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA. */
19
20 #include <sysdep.h>
21 #undef ret
22
23 /*
24 This code is used in dl-runtime.c to call the `_dl_fixup' function
25 and then redirect to the address it returns. `_dl_fixup()' takes two
26 arguments, however _dl_profile_fixup() takes five.
27
28 The ABI specifies that we will never see more than 8 input
29 registers to a function call, thus it is safe to simply allocate
30 those, and simpler than playing stack games. */
31
32 /* Used to save and restore 8 incoming fp registers */
33 #define RESOLVE_FRAME_SIZE (16*8)
34
35 ENTRY(_dl_runtime_resolve)
36 { .mmi
37 .prologue
38 .save ar.pfs, r40
39 alloc loc0 = ar.pfs, 8, 6, 2, 0
40 /* Use the 16 byte scratch area. r2 will start at f8 and
41 r3 will start at f9. */
42 adds r2 = -(RESOLVE_FRAME_SIZE - 16), r12
43 adds r3 = -(RESOLVE_FRAME_SIZE - 32), r12
44 }
45 { .mii
46 .fframe RESOLVE_FRAME_SIZE
47 adds r12 = -RESOLVE_FRAME_SIZE, r12
48 .save rp, loc1
49 mov loc1 = b0
50 .body
51 mov loc2 = r8 /* preserve struct value register */
52 ;;
53 }
54 { .mii
55 mov loc3 = r9 /* preserve language specific register */
56 mov loc4 = r10 /* preserve language specific register */
57 mov loc5 = r11 /* preserve language specific register */
58 }
59 { .mmi
60 stf.spill [r2] = f8, 32
61 stf.spill [r3] = f9, 32
62 mov out0 = r16
63 ;;
64 }
65 { .mmi
66 stf.spill [r2] = f10, 32
67 stf.spill [r3] = f11, 32
68 shl out1 = r15, 4
69 ;;
70 }
71 { .mmi
72 stf.spill [r2] = f12, 32
73 stf.spill [r3] = f13, 32
74 /* Relocation record is 24 byte. */
75 shladd out1 = r15, 3, out1
76 ;;
77 }
78 { .mmb
79 stf.spill [r2] = f14
80 stf.spill [r3] = f15
81 br.call.sptk.many b0 = _dl_fixup
82 }
83 { .mii
84 /* Skip the 16byte scratch area. */
85 adds r2 = 16, r12
86 adds r3 = 32, r12
87 mov b6 = ret0
88 ;;
89 }
90 { .mmi
91 ldf.fill f8 = [r2], 32
92 ldf.fill f9 = [r3], 32
93 mov b0 = loc1
94 ;;
95 }
96 { .mmi
97 ldf.fill f10 = [r2], 32
98 ldf.fill f11 = [r3], 32
99 mov gp = ret1
100 ;;
101 }
102 { .mmi
103 ldf.fill f12 = [r2], 32
104 ldf.fill f13 = [r3], 32
105 mov ar.pfs = loc0
106 ;;
107 }
108 { .mmi
109 ldf.fill f14 = [r2], 32
110 ldf.fill f15 = [r3], 32
111 .restore sp /* pop the unwind frame state */
112 adds r12 = RESOLVE_FRAME_SIZE, r12
113 ;;
114 }
115 { .mii
116 mov r9 = loc3 /* restore language specific register */
117 mov r10 = loc4 /* restore language specific register */
118 mov r11 = loc5 /* restore language specific register */
119 }
120 { .mii
121 mov r8 = loc2 /* restore struct value register */
122 ;;
123 }
124 /* An alloc is needed for the break system call to work.
125 We don't care about the old value of the pfs register. */
126 { .mmb
127 .prologue
128 .body
129 alloc r2 = ar.pfs, 0, 0, 8, 0
130 br.sptk.many b6
131 ;;
132 }
133 END(_dl_runtime_resolve)
134
135
136 /* The fourth argument to _dl_profile_fixup and the third one to
137 _dl_call_pltexit are a pointer to La_ia64_regs:
138
139 8byte r8
140 8byte r9
141 8byte r10
142 8byte r11
143 8byte in0
144 8byte in1
145 8byte in2
146 8byte in3
147 8byte in4
148 8byte in5
149 8byte in6
150 8byte in7
151 16byte f8
152 16byte f9
153 16byte f10
154 16byte f11
155 16byte f12
156 16byte f13
157 16byte f14
158 16byte f15
159 8byte ar.unat
160 8byte sp
161
162 The fifth argument to _dl_profile_fixup is a pointer to long int.
163 The fourth argument to _dl_call_pltexit is a pointer to
164 La_ia64_retval:
165
166 8byte r8
167 8byte r9
168 8byte r10
169 8byte r11
170 16byte f8
171 16byte f9
172 16byte f10
173 16byte f11
174 16byte f12
175 16byte f13
176 16byte f14
177 16byte f15
178
179 Since stack has to be 16 byte aligned, the stack allocation is in
180 16byte increment. Before calling _dl_profile_fixup, the stack will
181 look like
182
183 psp new frame_size
184 +16 La_ia64_regs
185 sp scratch
186
187 */
188
189 #define PLTENTER_FRAME_SIZE (4*8 + 8*8 + 8*16 + 2*8 + 16)
190 #define PLTEXIT_FRAME_SIZE (PLTENTER_FRAME_SIZE + 4*8 + 8*16)
191
192 #ifndef PROF
193 ENTRY(_dl_runtime_profile)
194 { .mii
195 .prologue
196 .save ar.pfs, r40
197 alloc loc0 = ar.pfs, 8, 12, 8, 0
198 .vframe loc10
199 mov loc10 = r12
200 .save rp, loc1
201 mov loc1 = b0
202 }
203 { .mii
204 .save ar.unat, r17
205 mov r17 = ar.unat
206 .save ar.lc, loc6
207 mov loc6 = ar.lc
208 mov loc11 = gp
209 }
210 { .mii
211 .body
212 /* There is a 16 byte scratch area. r2 will start at r8 and
213 r3 will start at r9 for La_ia64_regs. */
214 adds r2 = -(PLTENTER_FRAME_SIZE - 16), r12
215 adds r3 = -(PLTENTER_FRAME_SIZE - 24), r12
216 adds r12 = -PLTENTER_FRAME_SIZE, r12
217 ;;
218 }
219 { .mmi
220 st8 [r2] = r8, 16;
221 st8 [r3] = r9, 16;
222 mov out2 = b0 /* needed by _dl_fixup_profile */
223 ;;
224 }
225 { .mmi
226 st8 [r2] = r10, 16;
227 st8 [r3] = r11, 16;
228 adds out3 = 16, r12 /* pointer to La_ia64_regs */
229 ;;
230 }
231 { .mmi
232 .mem.offset 0, 0
233 st8.spill [r2] = in0, 16
234 .mem.offset 8, 0
235 st8.spill [r3] = in1, 16
236 mov out4 = loc10 /* pointer to new frame size */
237 ;;
238 }
239 { .mmi
240 .mem.offset 0, 0
241 st8.spill [r2] = in2, 16
242 .mem.offset 8, 0
243 st8.spill [r3] = in3, 16
244 mov loc2 = r8 /* preserve struct value register */
245 ;;
246 }
247 { .mmi
248 .mem.offset 0, 0
249 st8.spill [r2] = in4, 16
250 .mem.offset 8, 0
251 st8.spill [r3] = in5, 16
252 mov loc3 = r9 /* preserve language specific register */
253 ;;
254 }
255 { .mmi
256 .mem.offset 0, 0
257 st8 [r2] = in6, 16
258 .mem.offset 8, 0
259 st8 [r3] = in7, 24 /* adjust for f9 */
260 mov loc4 = r10 /* preserve language specific register */
261 ;;
262 }
263 { .mii
264 mov r18 = ar.unat /* save it in La_ia64_regs */
265 mov loc7 = out3 /* save it for _dl_call_pltexit */
266 mov loc5 = r11 /* preserve language specific register */
267 }
268 { .mmi
269 stf.spill [r2] = f8, 32
270 stf.spill [r3] = f9, 32
271 mov out0 = r16 /* needed by _dl_fixup_profile */
272 ;;
273 }
274 { .mii
275 mov ar.unat = r17 /* restore it for function call */
276 mov loc8 = r16 /* save it for _dl_call_pltexit */
277 nop.i 0x0
278 }
279 { .mmi
280 stf.spill [r2] = f10, 32
281 stf.spill [r3] = f11, 32
282 shl out1 = r15, 4
283 ;;
284 }
285 { .mmi
286 stf.spill [r2] = f12, 32
287 stf.spill [r3] = f13, 32
288 /* Relocation record is 24 byte. */
289 shladd out1 = r15, 3, out1
290 ;;
291 }
292 { .mmi
293 stf.spill [r2] = f14, 32
294 stf.spill [r3] = f15, 24
295 mov loc9 = out1 /* save it for _dl_call_pltexit */
296 ;;
297 }
298 { .mmb
299 st8 [r2] = r18 /* store ar.unat */
300 st8 [r3] = loc10 /* store sp */
301 br.call.sptk.many b0 = _dl_profile_fixup
302 }
303 { .mii
304 /* Skip the 16byte scratch area, 4 language specific GRs and
305 8 incoming GRs to restore incoming fp registers. */
306 adds r2 = (4*8 + 8*8 + 16), r12
307 adds r3 = (4*8 + 8*8 + 32), r12
308 mov b6 = ret0
309 ;;
310 }
311 { .mmi
312 ldf.fill f8 = [r2], 32
313 ldf.fill f9 = [r3], 32
314 mov gp = ret1
315 ;;
316 }
317 { .mmi
318 ldf.fill f10 = [r2], 32
319 ldf.fill f11 = [r3], 32
320 mov r8 = loc2 /* restore struct value register */
321 ;;
322 }
323 { .mmi
324 ldf.fill f12 = [r2], 32
325 ldf.fill f13 = [r3], 32
326 mov r9 = loc3 /* restore language specific register */
327 ;;
328 }
329 { .mmi
330 ldf.fill f14 = [r2], 32
331 ldf.fill f15 = [r3], 32
332 mov r10 = loc4 /* restore language specific register */
333 ;;
334 }
335 { .mii
336 ld8 r15 = [loc10] /* load the new frame size */
337 mov r11 = loc5 /* restore language specific register */
338 ;;
339 cmp.eq p6, p7 = -1, r15
340 ;;
341 }
342 { .mii
343 (p7) cmp.eq p8, p9 = 0, r15
344 (p6) mov b0 = loc1
345 (p6) mov ar.lc = loc6
346 }
347 { .mib
348 nop.m 0x0
349 (p6) mov ar.pfs = loc0
350 (p6) br.cond.dptk.many .Lresolved
351 ;;
352 }
353
354 /* At this point, the stack looks like
355
356 +psp free
357 +16 La_ia64_regs
358 sp scratch
359
360 We need to keep the current stack and call the resolved
361 function by copying the r15 byte from sp + PLTENTER_FRAME_SIZE
362 + 16 (scratch area) to sp + 16 (scratch area). Since stack
363 has to be 16byte aligned, we around r15 up to 16byte. */
364
365 { .mbb
366 (p9) adds r15 = 15, r15
367 (p8) br.cond.dptk.many .Lno_new_frame
368 nop.b 0x0
369 ;;
370 }
371 { .mmi
372 and r15 = -16, r15
373 ;;
374 /* We don't copy the 16byte scatch area. Prepare r16/r17 as
375 destination. */
376 sub r16 = r12, r15
377 sub r17 = r12, r15
378 ;;
379 }
380 { .mii
381 adds r16 = 16, r16
382 adds r17 = 24, r17
383 sub r12 = r12, r15 /* Adjust stack */
384 ;;
385 }
386 { .mii
387 nop.m 0x0
388 shr r15 = r15, 4
389 ;;
390 adds r15 = -1, r15
391 ;;
392 }
393 { .mii
394 /* Skip the 16byte scatch area. Prepare r2/r3 as source. */
395 adds r2 = 16, loc10
396 adds r3 = 24, loc10
397 mov ar.lc = r15
398 ;;
399 }
400 .Lcopy:
401 { .mmi
402 ld8 r18 = [r2], 16
403 ld8 r19 = [r3], 16
404 nop.i 0x0
405 ;;
406 }
407 { .mmb
408 st8 [r16] = r18, 16
409 st8 [r17] = r19, 16
410 br.cloop.sptk.few .Lcopy
411 }
412 .Lno_new_frame:
413 { .mii
414 mov out0 = in0
415 mov out1 = in1
416 mov out2 = in2
417 }
418 { .mii
419 mov out3 = in3
420 mov out4 = in4
421 mov out5 = in5
422 }
423 { .mib
424 mov out6 = in6
425 mov out7 = in7
426 /* Call the resolved function */
427 br.call.sptk.many b0 = b6
428 }
429 { .mii
430 /* Prepare stack for _dl_call_pltexit. Loc10 has the original
431 stack pointer. */
432 adds r12 = -PLTEXIT_FRAME_SIZE, loc10
433 adds r2 = -(PLTEXIT_FRAME_SIZE - 16), loc10
434 adds r3 = -(PLTEXIT_FRAME_SIZE - 24), loc10
435 ;;
436 }
437 { .mmi
438 /* Load all possible return values into buffer. */
439 st8 [r2] = r8, 16
440 st8 [r3] = r9, 16
441 mov out0 = loc8
442 ;;
443 }
444 { .mmi
445 st8 [r2] = r10, 16
446 st8 [r3] = r11, 24
447 mov out1 = loc9
448 ;;
449 }
450 { .mmi
451 stf.spill [r2] = f8, 32
452 stf.spill [r3] = f9, 32
453 mov out2 = loc7 /* Pointer to La_ia64_regs */
454 ;;
455 }
456 { .mmi
457 stf.spill [r2] = f10, 32
458 stf.spill [r3] = f11, 32
459 adds out3 = 16, r12 /* Pointer to La_ia64_retval */
460 ;;
461 }
462 { .mmi
463 stf.spill [r2] = f12, 32
464 stf.spill [r3] = f13, 32
465 /* We need to restore gp for _dl_call_pltexit. */
466 mov gp = loc11
467 ;;
468 }
469 { .mmb
470 stf.spill [r2] = f14
471 stf.spill [r3] = f15
472 br.call.sptk.many b0 = _dl_call_pltexit
473 }
474 { .mmi
475 /* Load all the non-floating and floating return values. Skip
476 the 16byte scratch area. */
477 adds r2 = 16, r12
478 adds r3 = 24, r12
479 nop.i 0x0
480 ;;
481 }
482 { .mmi
483 ld8 r8 = [r2], 16
484 ld8 r9 = [r3], 16
485 nop.i 0x0
486 ;;
487 }
488 { .mmi
489 ld8 r10 = [r2], 16
490 ld8 r11 = [r3], 24
491 nop.i 0x0
492 ;;
493 }
494 { .mmi
495 ldf.fill f8 = [r2], 32
496 ldf.fill f9 = [r3], 32
497 mov ar.lc = loc6
498 ;;
499 }
500 { .mmi
501 ldf.fill f10 = [r2], 32
502 ldf.fill f11 = [r3], 32
503 mov ar.pfs = loc0
504 ;;
505 }
506 { .mmi
507 ldf.fill f12 = [r2], 32
508 ldf.fill f13 = [r3], 32
509 mov b0 = loc1
510 ;;
511 }
512 { .mmi
513 ldf.fill f14 = [r2]
514 ldf.fill f15 = [r3]
515 /* We know that the previous stack pointer, loc10, isn't 0.
516 We use it to reload p7. */
517 cmp.ne p7, p0 = 0, loc10
518 ;;
519 }
520 .Lresolved:
521 { .mmb
522 .restore sp
523 mov r12 = loc10
524 (p7) br.ret.sptk.many b0
525 ;;
526 }
527 /* An alloc is needed for the break system call to work. We
528 don't care about the old value of the pfs register. After
529 this alloc, we can't use any rotating registers. Otherwise
530 assembler won't be happy. This has to be at the end. */
531 { .mmb
532 .prologue
533 .body
534 alloc r2 = ar.pfs, 0, 0, 8, 0
535 br.sptk.many b6
536 ;;
537 }
538 END(_dl_runtime_profile)
539 #endif