]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/x86_64/dl-trampoline.S
Preserve SSE registers in runtime relocations on x86-64.
[thirdparty/glibc.git] / sysdeps / x86_64 / dl-trampoline.S
1 /* PLT trampolines. x86-64 version.
2 Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA. */
19
20 #include <config.h>
21 #include <sysdep.h>
22 #include <link-defines.h>
23
24 .text
25 .globl _dl_runtime_resolve
26 .type _dl_runtime_resolve, @function
27 .align 16
28 cfi_startproc
29 _dl_runtime_resolve:
30 subq $56,%rsp
31 cfi_adjust_cfa_offset(72) # Incorporate PLT
32 movq %rax,(%rsp) # Preserve registers otherwise clobbered.
33 movq %rcx, 8(%rsp)
34 movq %rdx, 16(%rsp)
35 movq %rsi, 24(%rsp)
36 movq %rdi, 32(%rsp)
37 movq %r8, 40(%rsp)
38 movq %r9, 48(%rsp)
39 movq 64(%rsp), %rsi # Copy args pushed by PLT in register.
40 movq 56(%rsp), %rdi # %rdi: link_map, %rsi: reloc_index
41 call _dl_fixup # Call resolver.
42 movq %rax, %r11 # Save return value
43 movq 48(%rsp), %r9 # Get register content back.
44 movq 40(%rsp), %r8
45 movq 32(%rsp), %rdi
46 movq 24(%rsp), %rsi
47 movq 16(%rsp), %rdx
48 movq 8(%rsp), %rcx
49 movq (%rsp), %rax
50 addq $72, %rsp # Adjust stack(PLT did 2 pushes)
51 cfi_adjust_cfa_offset(-72)
52 jmp *%r11 # Jump to function address.
53 cfi_endproc
54 .size _dl_runtime_resolve, .-_dl_runtime_resolve
55
56
57 #ifndef PROF
58 .globl _dl_runtime_profile
59 .type _dl_runtime_profile, @function
60 .align 16
61 cfi_startproc
62
63 _dl_runtime_profile:
64 /* The La_x86_64_regs data structure pointed to by the
65 fourth paramater must be 16-byte aligned. This must
66 be explicitly enforced. We have the set up a dynamically
67 sized stack frame. %rbx points to the top half which
68 has a fixed size and preserves the original stack pointer. */
69
70 subq $32, %rsp # Allocate the local storage.
71 cfi_adjust_cfa_offset(48) # Incorporate PLT
72 movq %rbx, (%rsp)
73 cfi_rel_offset(%rbx, 0)
74
75 /* On the stack:
76 56(%rbx) parameter #1
77 48(%rbx) return address
78
79 40(%rbx) reloc index
80 32(%rbx) link_map
81
82 24(%rbx) La_x86_64_regs pointer
83 16(%rbx) framesize
84 8(%rbx) rax
85 (%rbx) rbx
86 */
87
88 movq %rax, 8(%rsp)
89 movq %rsp, %rbx
90 cfi_def_cfa_register(%rbx)
91
92 /* Actively align the La_x86_64_regs structure. */
93 andq $0xfffffffffffffff0, %rsp
94 # ifdef HAVE_AVX_SUPPORT
95 /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers
96 to detect if any xmm0-xmm7 registers are changed by audit
97 module. */
98 subq $(LR_SIZE + XMM_SIZE*8), %rsp
99 # else
100 subq $LR_SIZE, %rsp # sizeof(La_x86_64_regs)
101 # endif
102 movq %rsp, 24(%rbx)
103
104 /* Fill the La_x86_64_regs structure. */
105 movq %rdx, LR_RDX_OFFSET(%rsp)
106 movq %r8, LR_R8_OFFSET(%rsp)
107 movq %r9, LR_R9_OFFSET(%rsp)
108 movq %rcx, LR_RCX_OFFSET(%rsp)
109 movq %rsi, LR_RSI_OFFSET(%rsp)
110 movq %rdi, LR_RDI_OFFSET(%rsp)
111 movq %rbp, LR_RBP_OFFSET(%rsp)
112
113 leaq 48(%rbx), %rax
114 movq %rax, LR_RSP_OFFSET(%rsp)
115
116 /* We always store the XMM registers even if AVX is available.
117 This is to provide backward binary compatility for existing
118 audit modules. */
119 movaps %xmm0, (LR_XMM_OFFSET)(%rsp)
120 movaps %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
121 movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
122 movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
123 movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
124 movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
125 movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
126 movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
127
128 # ifdef HAVE_AVX_SUPPORT
129 .data
130 L(have_avx):
131 .zero 4
132 .size L(have_avx), 4
133 .previous
134
135 cmpl $0, L(have_avx)(%rip)
136 jne 1f
137 movq %rbx, %r11 # Save rbx
138 movl $1, %eax
139 cpuid
140 movq %r11,%rbx # Restore rbx
141 movl $1, %eax
142 testl $(1 << 28), %ecx
143 jne 2f
144 negl %eax
145 2: movl %eax, L(have_avx)(%rip)
146 cmpl $0, %eax
147
148 1: js L(no_avx1)
149
150 /* This is to support AVX audit modules. */
151 vmovdqu %ymm0, (LR_VECTOR_OFFSET)(%rsp)
152 vmovdqu %ymm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
153 vmovdqu %ymm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
154 vmovdqu %ymm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
155 vmovdqu %ymm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
156 vmovdqu %ymm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
157 vmovdqu %ymm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
158 vmovdqu %ymm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
159
160 /* Save xmm0-xmm7 registers to detect if any of them are
161 changed by audit module. */
162 vmovdqa %xmm0, (LR_SIZE)(%rsp)
163 vmovdqa %xmm1, (LR_SIZE + XMM_SIZE)(%rsp)
164 vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp)
165 vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp)
166 vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp)
167 vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp)
168 vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp)
169 vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp)
170
171 L(no_avx1):
172 # endif
173
174 movq %rsp, %rcx # La_x86_64_regs pointer to %rcx.
175 movq 48(%rbx), %rdx # Load return address if needed.
176 movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
177 movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
178 leaq 16(%rbx), %r8
179 call _dl_profile_fixup # Call resolver.
180
181 movq %rax, %r11 # Save return value.
182
183 movq 8(%rbx), %rax # Get back register content.
184 movq LR_RDX_OFFSET(%rsp), %rdx
185 movq LR_R8_OFFSET(%rsp), %r8
186 movq LR_R9_OFFSET(%rsp), %r9
187
188 movaps (LR_XMM_OFFSET)(%rsp), %xmm0
189 movaps (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
190 movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
191 movaps (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3
192 movaps (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4
193 movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
194 movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
195 movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
196
197 # ifdef HAVE_AVX_SUPPORT
198 cmpl $0, L(have_avx)(%rip)
199 js L(no_avx2)
200
201 /* Check if any xmm0-xmm7 registers are changed by audit
202 module. */
203 vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8
204 vpmovmskb %xmm8, %esi
205 cmpl $0xffff, %esi
206 je 1f
207 vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0
208
209 1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8
210 vpmovmskb %xmm8, %esi
211 cmpl $0xffff, %esi
212 je 1f
213 vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1
214
215 1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8
216 vpmovmskb %xmm8, %esi
217 cmpl $0xffff, %esi
218 je 1f
219 vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2
220
221 1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8
222 vpmovmskb %xmm8, %esi
223 cmpl $0xffff, %esi
224 je 1f
225 vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3
226
227 1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8
228 vpmovmskb %xmm8, %esi
229 cmpl $0xffff, %esi
230 je 1f
231 vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4
232
233 1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8
234 vpmovmskb %xmm8, %esi
235 cmpl $0xffff, %esi
236 je 1f
237 vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5
238
239 1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8
240 vpmovmskb %xmm8, %esi
241 cmpl $0xffff, %esi
242 je 1f
243 vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6
244
245 1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
246 vpmovmskb %xmm8, %esi
247 cmpl $0xffff, %esi
248 je 1f
249 vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7
250
251 L(no_avx2):
252 1:
253 # endif
254 movq 16(%rbx), %r10 # Anything in framesize?
255 testq %r10, %r10
256 jns 3f
257
258 /* There's nothing in the frame size, so there
259 will be no call to the _dl_call_pltexit. */
260
261 /* Get back registers content. */
262 movq LR_RCX_OFFSET(%rsp), %rcx
263 movq LR_RSI_OFFSET(%rsp), %rsi
264 movq LR_RDI_OFFSET(%rsp), %rdi
265
266 movq %rbx, %rsp
267 movq (%rsp), %rbx
268 cfi_restore(rbx)
269 cfi_def_cfa_register(%rsp)
270
271 addq $48, %rsp # Adjust the stack to the return value
272 # (eats the reloc index and link_map)
273 cfi_adjust_cfa_offset(-48)
274 jmp *%r11 # Jump to function address.
275
276 3:
277 cfi_adjust_cfa_offset(48)
278 cfi_rel_offset(%rbx, 0)
279 cfi_def_cfa_register(%rbx)
280
281 /* At this point we need to prepare new stack for the function
282 which has to be called. We copy the original stack to a
283 temporary buffer of the size specified by the 'framesize'
284 returned from _dl_profile_fixup */
285
286 leaq LR_RSP_OFFSET(%rbx), %rsi # stack
287 addq $8, %r10
288 andq $0xfffffffffffffff0, %r10
289 movq %r10, %rcx
290 subq %r10, %rsp
291 movq %rsp, %rdi
292 shrq $3, %rcx
293 rep
294 movsq
295
296 movq 24(%rdi), %rcx # Get back register content.
297 movq 32(%rdi), %rsi
298 movq 40(%rdi), %rdi
299
300 call *%r11
301
302 mov 24(%rbx), %rsp # Drop the copied stack content
303
304 /* Now we have to prepare the La_x86_64_retval structure for the
305 _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now,
306 so we just need to allocate the sizeof(La_x86_64_retval) space on
307 the stack, since the alignment has already been taken care of. */
308 # ifdef HAVE_AVX_SUPPORT
309 /* sizeof(La_x86_64_retval). Need extra space for 2 SSE
310 registers to detect if xmm0/xmm1 registers are changed
311 by audit module. */
312 subq $(LRV_SIZE + XMM_SIZE*2), %rsp
313 # else
314 subq $LRV_SIZE, %rsp # sizeof(La_x86_64_retval)
315 # endif
316 movq %rsp, %rcx # La_x86_64_retval argument to %rcx.
317
318 /* Fill in the La_x86_64_retval structure. */
319 movq %rax, LRV_RAX_OFFSET(%rcx)
320 movq %rdx, LRV_RDX_OFFSET(%rcx)
321
322 movaps %xmm0, LRV_XMM0_OFFSET(%rcx)
323 movaps %xmm1, LRV_XMM1_OFFSET(%rcx)
324
325 # ifdef HAVE_AVX_SUPPORT
326 cmpl $0, L(have_avx)(%rip)
327 js L(no_avx3)
328
329 /* This is to support AVX audit modules. */
330 vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx)
331 vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx)
332
333 /* Save xmm0/xmm1 registers to detect if they are changed
334 by audit module. */
335 vmovdqa %xmm0, (LRV_SIZE)(%rcx)
336 vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx)
337
338 L(no_avx3):
339 # endif
340
341 fstpt LRV_ST0_OFFSET(%rcx)
342 fstpt LRV_ST1_OFFSET(%rcx)
343
344 movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx.
345 movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
346 movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
347 call _dl_call_pltexit
348
349 /* Restore return registers. */
350 movq LRV_RAX_OFFSET(%rsp), %rax
351 movq LRV_RDX_OFFSET(%rsp), %rdx
352
353 movaps LRV_XMM0_OFFSET(%rsp), %xmm0
354 movaps LRV_XMM1_OFFSET(%rsp), %xmm1
355
356 # ifdef HAVE_AVX_SUPPORT
357 cmpl $0, L(have_avx)(%rip)
358 js L(no_avx4)
359
360 /* Check if xmm0/xmm1 registers are changed by audit module. */
361 vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2
362 vpmovmskb %xmm2, %esi
363 cmpl $0xffff, %esi
364 je 1f
365 vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0
366
367 1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
368 vpmovmskb %xmm2, %esi
369 cmpl $0xffff, %esi
370 je 1f
371 vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1
372
373 L(no_avx4):
374 1:
375 # endif
376
377 fldt LRV_ST1_OFFSET(%rsp)
378 fldt LRV_ST0_OFFSET(%rsp)
379
380 movq %rbx, %rsp
381 movq (%rsp), %rbx
382 cfi_restore(rbx)
383 cfi_def_cfa_register(%rsp)
384
385 addq $48, %rsp # Adjust the stack to the return value
386 # (eats the reloc index and link_map)
387 cfi_adjust_cfa_offset(-48)
388 retq
389
390 cfi_endproc
391 .size _dl_runtime_profile, .-_dl_runtime_profile
392 #endif
393
394
395 #ifdef SHARED
396 .globl _dl_x86_64_save_sse
397 .type _dl_x86_64_save_sse, @function
398 .align 16
399 cfi_startproc
400 _dl_x86_64_save_sse:
401 # ifdef HAVE_AVX_SUPPORT
402 cmpl $0, L(have_avx)(%rip)
403 jne 1f
404 movq %rbx, %r11 # Save rbx
405 movl $1, %eax
406 cpuid
407 movq %r11,%rbx # Restore rbx
408 movl $1, %eax
409 testl $(1 << 28), %ecx
410 jne 2f
411 negl %eax
412 2: movl %eax, L(have_avx)(%rip)
413 cmpl $0, %eax
414
415 1: js L(no_avx5)
416
417 # define YMM_SIZE 32
418 vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE
419 vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE
420 vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE
421 vmovdqa %ymm3, %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE
422 vmovdqa %ymm4, %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE
423 vmovdqa %ymm5, %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE
424 vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE
425 vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE
426 ret
427 L(no_avx5):
428 # endif
429 # define YMM_SIZE 16
430 movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE
431 movdqa %xmm1, %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE
432 movdqa %xmm2, %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE
433 movdqa %xmm3, %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE
434 movdqa %xmm4, %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE
435 movdqa %xmm5, %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE
436 movdqa %xmm6, %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE
437 movdqa %xmm7, %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE
438 ret
439 cfi_endproc
440 .size _dl_x86_64_save_sse, .-_dl_x86_64_save_sse
441
442
443 .globl _dl_x86_64_restore_sse
444 .type _dl_x86_64_restore_sse, @function
445 .align 16
446 cfi_startproc
447 _dl_x86_64_restore_sse:
448 # ifdef HAVE_AVX_SUPPORT
449 cmpl $0, L(have_avx)(%rip)
450 js L(no_avx6)
451
452 vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0
453 vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1
454 vmovdqa %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE, %ymm2
455 vmovdqa %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE, %ymm3
456 vmovdqa %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE, %ymm4
457 vmovdqa %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE, %ymm5
458 vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6
459 vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7
460 ret
461 L(no_avx6):
462 # endif
463 movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0
464 movdqa %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE, %xmm1
465 movdqa %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE, %xmm2
466 movdqa %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE, %xmm3
467 movdqa %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE, %xmm4
468 movdqa %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE, %xmm5
469 movdqa %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE, %xmm6
470 movdqa %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE, %xmm7
471 ret
472 cfi_endproc
473 .size _dl_x86_64_restore_sse, .-_dl_x86_64_restore_sse
474 #endif