]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/x86_64/dl-trampoline.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / x86_64 / dl-trampoline.S
CommitLineData
9dcafc55 1/* PLT trampolines. x86-64 version.
b168057a 2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
9dcafc55
UD
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
9dcafc55 18
b0ecde3a 19#include <config.h>
9dcafc55 20#include <sysdep.h>
b0ecde3a 21#include <link-defines.h>
9dcafc55 22
1cf463cd
L
23#if (RTLD_SAVESPACE_SSE % 32) != 0
24# error RTLD_SAVESPACE_SSE must be aligned to 32 bytes
25#endif
26
a4c75cfd
IZ
27/* Area on stack to save and restore registers used for parameter
28 passing when calling _dl_fixup. */
29#ifdef __ILP32__
30/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX. */
31# define REGISTER_SAVE_AREA (8 * 7)
32# define REGISTER_SAVE_RAX 0
33#else
34/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as BND0,
35 BND1, BND2, BND3. */
36# define REGISTER_SAVE_AREA (8 * 7 + 16 * 4)
37/* Align bound register save area to 16 bytes. */
38# define REGISTER_SAVE_BND0 0
39# define REGISTER_SAVE_BND1 (REGISTER_SAVE_BND0 + 16)
40# define REGISTER_SAVE_BND2 (REGISTER_SAVE_BND1 + 16)
41# define REGISTER_SAVE_BND3 (REGISTER_SAVE_BND2 + 16)
42# define REGISTER_SAVE_RAX (REGISTER_SAVE_BND3 + 16)
43#endif
44#define REGISTER_SAVE_RCX (REGISTER_SAVE_RAX + 8)
45#define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8)
46#define REGISTER_SAVE_RSI (REGISTER_SAVE_RDX + 8)
47#define REGISTER_SAVE_RDI (REGISTER_SAVE_RSI + 8)
48#define REGISTER_SAVE_R8 (REGISTER_SAVE_RDI + 8)
49#define REGISTER_SAVE_R9 (REGISTER_SAVE_R8 + 8)
50
9dcafc55
UD
51 .text
52 .globl _dl_runtime_resolve
53 .type _dl_runtime_resolve, @function
54 .align 16
55 cfi_startproc
56_dl_runtime_resolve:
0276a718 57 cfi_adjust_cfa_offset(16) # Incorporate PLT
a4c75cfd
IZ
58 subq $REGISTER_SAVE_AREA,%rsp
59 cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
60 # Preserve registers otherwise clobbered.
61 movq %rax, REGISTER_SAVE_RAX(%rsp)
62 movq %rcx, REGISTER_SAVE_RCX(%rsp)
63 movq %rdx, REGISTER_SAVE_RDX(%rsp)
64 movq %rsi, REGISTER_SAVE_RSI(%rsp)
65 movq %rdi, REGISTER_SAVE_RDI(%rsp)
66 movq %r8, REGISTER_SAVE_R8(%rsp)
67 movq %r9, REGISTER_SAVE_R9(%rsp)
68#ifndef __ILP32__
69 # We also have to preserve bound registers. These are nops if
70 # Intel MPX isn't available or disabled.
71# ifdef HAVE_MPX_SUPPORT
72 bndmov %bnd0, REGISTER_SAVE_BND0(%rsp)
73 bndmov %bnd1, REGISTER_SAVE_BND1(%rsp)
74 bndmov %bnd2, REGISTER_SAVE_BND2(%rsp)
75 bndmov %bnd3, REGISTER_SAVE_BND3(%rsp)
76# else
77 .byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0
78 .byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1
79 .byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2
80 .byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3
81# endif
82#endif
83 # Copy args pushed by PLT in register.
84 # %rdi: link_map, %rsi: reloc_index
85 movq (REGISTER_SAVE_AREA + 8)(%rsp), %rsi
86 movq REGISTER_SAVE_AREA(%rsp), %rdi
9dcafc55
UD
87 call _dl_fixup # Call resolver.
88 movq %rax, %r11 # Save return value
a4c75cfd
IZ
89#ifndef __ILP32__
90 # Restore bound registers. These are nops if Intel MPX isn't
91 # avaiable or disabled.
92# ifdef HAVE_MPX_SUPPORT
93 bndmov REGISTER_SAVE_BND3(%rsp), %bnd3
94 bndmov REGISTER_SAVE_BND2(%rsp), %bnd2
95 bndmov REGISTER_SAVE_BND1(%rsp), %bnd1
96 bndmov REGISTER_SAVE_BND0(%rsp), %bnd0
97# else
98 .byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3
99 .byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2
100 .byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1
101 .byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0
102# endif
103#endif
104 # Get register content back.
105 movq REGISTER_SAVE_R9(%rsp), %r9
106 movq REGISTER_SAVE_R8(%rsp), %r8
107 movq REGISTER_SAVE_RDI(%rsp), %rdi
108 movq REGISTER_SAVE_RSI(%rsp), %rsi
109 movq REGISTER_SAVE_RDX(%rsp), %rdx
110 movq REGISTER_SAVE_RCX(%rsp), %rcx
111 movq REGISTER_SAVE_RAX(%rsp), %rax
112 # Adjust stack(PLT did 2 pushes)
113 addq $(REGISTER_SAVE_AREA + 16), %rsp
114 cfi_adjust_cfa_offset(-(REGISTER_SAVE_AREA + 16))
9dcafc55
UD
115 jmp *%r11 # Jump to function address.
116 cfi_endproc
117 .size _dl_runtime_resolve, .-_dl_runtime_resolve
118
119
9f0d7b6d 120#ifndef PROF
9dcafc55
UD
121 .globl _dl_runtime_profile
122 .type _dl_runtime_profile, @function
123 .align 16
124 cfi_startproc
1f7c90a7 125
9dcafc55 126_dl_runtime_profile:
649bf133 127 cfi_adjust_cfa_offset(16) # Incorporate PLT
1f7c90a7
UD
128 /* The La_x86_64_regs data structure pointed to by the
129 fourth paramater must be 16-byte aligned. This must
130 be explicitly enforced. We have the set up a dynamically
131 sized stack frame. %rbx points to the top half which
132 has a fixed size and preserves the original stack pointer. */
133
134 subq $32, %rsp # Allocate the local storage.
649bf133 135 cfi_adjust_cfa_offset(32)
1f7c90a7
UD
136 movq %rbx, (%rsp)
137 cfi_rel_offset(%rbx, 0)
138
139 /* On the stack:
140 56(%rbx) parameter #1
141 48(%rbx) return address
142
143 40(%rbx) reloc index
144 32(%rbx) link_map
145
146 24(%rbx) La_x86_64_regs pointer
147 16(%rbx) framesize
148 8(%rbx) rax
149 (%rbx) rbx
150 */
151
152 movq %rax, 8(%rsp)
153 movq %rsp, %rbx
154 cfi_def_cfa_register(%rbx)
155
156 /* Actively align the La_x86_64_regs structure. */
157 andq $0xfffffffffffffff0, %rsp
2d63a517 158# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
b0ecde3a
L
159 /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers
160 to detect if any xmm0-xmm7 registers are changed by audit
161 module. */
162 subq $(LR_SIZE + XMM_SIZE*8), %rsp
d7bd7a8a 163# else
b0ecde3a 164 subq $LR_SIZE, %rsp # sizeof(La_x86_64_regs)
d7bd7a8a 165# endif
1f7c90a7
UD
166 movq %rsp, 24(%rbx)
167
b0ecde3a
L
168 /* Fill the La_x86_64_regs structure. */
169 movq %rdx, LR_RDX_OFFSET(%rsp)
170 movq %r8, LR_R8_OFFSET(%rsp)
171 movq %r9, LR_R9_OFFSET(%rsp)
172 movq %rcx, LR_RCX_OFFSET(%rsp)
173 movq %rsi, LR_RSI_OFFSET(%rsp)
174 movq %rdi, LR_RDI_OFFSET(%rsp)
175 movq %rbp, LR_RBP_OFFSET(%rsp)
9dcafc55 176
d7bd7a8a
UD
177 leaq 48(%rbx), %rax
178 movq %rax, LR_RSP_OFFSET(%rsp)
179
180 /* We always store the XMM registers even if AVX is available.
382466e0 181 This is to provide backward binary compatibility for existing
d7bd7a8a
UD
182 audit modules. */
183 movaps %xmm0, (LR_XMM_OFFSET)(%rsp)
184 movaps %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
185 movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
186 movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
187 movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
188 movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
189 movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
190 movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
191
ea8ba7cd
IZ
192# ifndef __ILP32__
193# ifdef HAVE_MPX_SUPPORT
194 bndmov %bnd0, (LR_BND_OFFSET)(%rsp) # Preserve bound
195 bndmov %bnd1, (LR_BND_OFFSET + BND_SIZE)(%rsp) # registers. Nops if
196 bndmov %bnd2, (LR_BND_OFFSET + BND_SIZE*2)(%rsp) # MPX not available
197 bndmov %bnd3, (LR_BND_OFFSET + BND_SIZE*3)(%rsp) # or disabled.
198# else
199 .byte 0x66,0x0f,0x1b,0x84,0x24;.long (LR_BND_OFFSET)
200 .byte 0x66,0x0f,0x1b,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE)
201 .byte 0x66,0x0f,0x1b,0x84,0x24;.long (LR_BND_OFFSET + BND_SIZE*2)
202 .byte 0x66,0x0f,0x1b,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3)
203# endif
204# endif
205
2d63a517 206# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
d7bd7a8a
UD
207 .data
208L(have_avx):
209 .zero 4
210 .size L(have_avx), 4
211 .previous
1f7c90a7 212
d7bd7a8a 213 cmpl $0, L(have_avx)(%rip)
2d63a517 214 jne L(defined)
d7bd7a8a
UD
215 movq %rbx, %r11 # Save rbx
216 movl $1, %eax
217 cpuid
218 movq %r11,%rbx # Restore rbx
5644ef54
UD
219 xorl %eax, %eax
220 // AVX and XSAVE supported?
1aae088a
UD
221 andl $((1 << 28) | (1 << 27)), %ecx
222 cmpl $((1 << 28) | (1 << 27)), %ecx
2d63a517
IZ
223 jne 10f
224# ifdef HAVE_AVX512_ASM_SUPPORT
225 // AVX512 supported in processor?
226 movq %rbx, %r11 # Save rbx
227 xorl %ecx, %ecx
228 mov $0x7, %eax
229 cpuid
230 andl $(1 << 16), %ebx
231# endif
5644ef54
UD
232 xorl %ecx, %ecx
233 // Get XFEATURE_ENABLED_MASK
234 xgetbv
2d63a517
IZ
235# ifdef HAVE_AVX512_ASM_SUPPORT
236 test %ebx, %ebx
237 movq %r11, %rbx # Restore rbx
238 je 20f
239 // Verify that XCR0[7:5] = '111b' and
240 // XCR0[2:1] = '11b' which means
241 // that zmm state is enabled
242 andl $0xe6, %eax
243 cmpl $0xe6, %eax
244 jne 20f
245 movl %eax, L(have_avx)(%rip)
246L(avx512):
247# define RESTORE_AVX
248# define VMOV vmovdqu64
249# define VEC(i) zmm##i
250# define MORE_CODE
251# include "dl-trampoline.h"
252# undef VMOV
253# undef VEC
254# undef RESTORE_AVX
255# endif
25620: andl $0x6, %eax
25710: subl $0x5, %eax
1d002f25 258 movl %eax, L(have_avx)(%rip)
d7bd7a8a
UD
259 cmpl $0, %eax
260
2d63a517
IZ
261L(defined):
262 js L(no_avx)
263# ifdef HAVE_AVX512_ASM_SUPPORT
264 cmpl $0xe6, L(have_avx)(%rip)
265 je L(avx512)
266# endif
d7bd7a8a 267
4e1e2f42 268# define RESTORE_AVX
2d63a517
IZ
269# define VMOV vmovdqu
270# define VEC(i) ymm##i
c88f1766 271# define MORE_CODE
4e1e2f42 272# include "dl-trampoline.h"
d7bd7a8a 273
4e1e2f42
L
274 .align 16
275L(no_avx):
ca419225 276# endif
d7bd7a8a 277
c88f1766
UD
278# undef RESTORE_AVX
279# include "dl-trampoline.h"
d7bd7a8a
UD
280
281 cfi_endproc
282 .size _dl_runtime_profile, .-_dl_runtime_profile
9f0d7b6d 283#endif
b48a267b
UD
284
285
286#ifdef SHARED
287 .globl _dl_x86_64_save_sse
288 .type _dl_x86_64_save_sse, @function
289 .align 16
290 cfi_startproc
291_dl_x86_64_save_sse:
2d63a517 292# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
b48a267b 293 cmpl $0, L(have_avx)(%rip)
2d63a517 294 jne L(defined_5)
b48a267b
UD
295 movq %rbx, %r11 # Save rbx
296 movl $1, %eax
297 cpuid
298 movq %r11,%rbx # Restore rbx
1d002f25
AS
299 xorl %eax, %eax
300 // AVX and XSAVE supported?
1aae088a
UD
301 andl $((1 << 28) | (1 << 27)), %ecx
302 cmpl $((1 << 28) | (1 << 27)), %ecx
2d63a517
IZ
303 jne 1f
304# ifdef HAVE_AVX512_ASM_SUPPORT
305 // AVX512 supported in a processor?
306 movq %rbx, %r11 # Save rbx
307 xorl %ecx,%ecx
308 mov $0x7,%eax
309 cpuid
310 andl $(1 << 16), %ebx
311# endif
1d002f25
AS
312 xorl %ecx, %ecx
313 // Get XFEATURE_ENABLED_MASK
314 xgetbv
2d63a517
IZ
315# ifdef HAVE_AVX512_ASM_SUPPORT
316 test %ebx, %ebx
317 movq %r11, %rbx # Restore rbx
318 je 2f
319 // Verify that XCR0[7:5] = '111b' and
320 // XCR0[2:1] = '11b' which means
321 // that zmm state is enabled
322 andl $0xe6, %eax
323 movl %eax, L(have_avx)(%rip)
324 cmpl $0xe6, %eax
325 je L(avx512_5)
326# endif
327
3282: andl $0x6, %eax
3291: subl $0x5, %eax
1d002f25 330 movl %eax, L(have_avx)(%rip)
b48a267b
UD
331 cmpl $0, %eax
332
2d63a517
IZ
333L(defined_5):
334 js L(no_avx5)
335# ifdef HAVE_AVX512_ASM_SUPPORT
336 cmpl $0xe6, L(have_avx)(%rip)
337 je L(avx512_5)
338# endif
b48a267b 339
b48a267b
UD
340 vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE
341 vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE
342 vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE
343 vmovdqa %ymm3, %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE
344 vmovdqa %ymm4, %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE
345 vmovdqa %ymm5, %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE
346 vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE
347 vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE
348 ret
2d63a517
IZ
349# ifdef HAVE_AVX512_ASM_SUPPORT
350L(avx512_5):
351 vmovdqu64 %zmm0, %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE
352 vmovdqu64 %zmm1, %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE
353 vmovdqu64 %zmm2, %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE
354 vmovdqu64 %zmm3, %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE
355 vmovdqu64 %zmm4, %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE
356 vmovdqu64 %zmm5, %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE
357 vmovdqu64 %zmm6, %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE
358 vmovdqu64 %zmm7, %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE
359 ret
360# endif
b48a267b
UD
361L(no_avx5):
362# endif
b48a267b
UD
363 movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE
364 movdqa %xmm1, %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE
365 movdqa %xmm2, %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE
366 movdqa %xmm3, %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE
367 movdqa %xmm4, %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE
368 movdqa %xmm5, %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE
369 movdqa %xmm6, %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE
370 movdqa %xmm7, %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE
371 ret
372 cfi_endproc
373 .size _dl_x86_64_save_sse, .-_dl_x86_64_save_sse
374
375
376 .globl _dl_x86_64_restore_sse
377 .type _dl_x86_64_restore_sse, @function
378 .align 16
379 cfi_startproc
380_dl_x86_64_restore_sse:
2d63a517 381# if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT
b48a267b
UD
382 cmpl $0, L(have_avx)(%rip)
383 js L(no_avx6)
2d63a517
IZ
384# ifdef HAVE_AVX512_ASM_SUPPORT
385 cmpl $0xe6, L(have_avx)(%rip)
386 je L(avx512_6)
387# endif
b48a267b
UD
388
389 vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0
390 vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1
391 vmovdqa %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE, %ymm2
392 vmovdqa %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE, %ymm3
393 vmovdqa %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE, %ymm4
394 vmovdqa %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE, %ymm5
395 vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6
396 vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7
397 ret
2d63a517
IZ
398# ifdef HAVE_AVX512_ASM_SUPPORT
399L(avx512_6):
400 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE, %zmm0
401 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE, %zmm1
402 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE, %zmm2
403 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE, %zmm3
404 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE, %zmm4
405 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE, %zmm5
406 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE, %zmm6
407 vmovdqu64 %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE, %zmm7
408 ret
409# endif
b48a267b
UD
410L(no_avx6):
411# endif
412 movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0
413 movdqa %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE, %xmm1
414 movdqa %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE, %xmm2
415 movdqa %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE, %xmm3
416 movdqa %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE, %xmm4
417 movdqa %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE, %xmm5
418 movdqa %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE, %xmm6
419 movdqa %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE, %xmm7
420 ret
421 cfi_endproc
422 .size _dl_x86_64_restore_sse, .-_dl_x86_64_restore_sse
423#endif