]>
Commit | Line | Data |
---|---|---|
9dcafc55 | 1 | /* PLT trampolines. x86-64 version. |
d4697bc9 | 2 | Copyright (C) 2004-2014 Free Software Foundation, Inc. |
9dcafc55 UD |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 PE |
16 | License along with the GNU C Library; if not, see |
17 | <http://www.gnu.org/licenses/>. */ | |
9dcafc55 | 18 | |
b0ecde3a | 19 | #include <config.h> |
9dcafc55 | 20 | #include <sysdep.h> |
b0ecde3a | 21 | #include <link-defines.h> |
9dcafc55 | 22 | |
1cf463cd L |
23 | #if (RTLD_SAVESPACE_SSE % 32) != 0 |
24 | # error RTLD_SAVESPACE_SSE must be aligned to 32 bytes | |
25 | #endif | |
26 | ||
9dcafc55 UD |
27 | .text |
28 | .globl _dl_runtime_resolve | |
29 | .type _dl_runtime_resolve, @function | |
30 | .align 16 | |
31 | cfi_startproc | |
32 | _dl_runtime_resolve: | |
0276a718 | 33 | cfi_adjust_cfa_offset(16) # Incorporate PLT |
9dcafc55 | 34 | subq $56,%rsp |
0276a718 | 35 | cfi_adjust_cfa_offset(56) |
9dcafc55 UD |
36 | movq %rax,(%rsp) # Preserve registers otherwise clobbered. |
37 | movq %rcx, 8(%rsp) | |
38 | movq %rdx, 16(%rsp) | |
39 | movq %rsi, 24(%rsp) | |
40 | movq %rdi, 32(%rsp) | |
41 | movq %r8, 40(%rsp) | |
42 | movq %r9, 48(%rsp) | |
43 | movq 64(%rsp), %rsi # Copy args pushed by PLT in register. | |
906dd40d | 44 | movq 56(%rsp), %rdi # %rdi: link_map, %rsi: reloc_index |
9dcafc55 UD |
45 | call _dl_fixup # Call resolver. |
46 | movq %rax, %r11 # Save return value | |
47 | movq 48(%rsp), %r9 # Get register content back. | |
48 | movq 40(%rsp), %r8 | |
49 | movq 32(%rsp), %rdi | |
50 | movq 24(%rsp), %rsi | |
51 | movq 16(%rsp), %rdx | |
52 | movq 8(%rsp), %rcx | |
53 | movq (%rsp), %rax | |
54 | addq $72, %rsp # Adjust stack(PLT did 2 pushes) | |
55 | cfi_adjust_cfa_offset(-72) | |
56 | jmp *%r11 # Jump to function address. | |
57 | cfi_endproc | |
58 | .size _dl_runtime_resolve, .-_dl_runtime_resolve | |
59 | ||
60 | ||
9f0d7b6d | 61 | #ifndef PROF |
9dcafc55 UD |
62 | .globl _dl_runtime_profile |
63 | .type _dl_runtime_profile, @function | |
64 | .align 16 | |
65 | cfi_startproc | |
1f7c90a7 | 66 | |
9dcafc55 | 67 | _dl_runtime_profile: |
649bf133 | 68 | cfi_adjust_cfa_offset(16) # Incorporate PLT |
1f7c90a7 UD |
69 | /* The La_x86_64_regs data structure pointed to by the |
70 | fourth paramater must be 16-byte aligned. This must | |
71 | be explicitly enforced. We have the set up a dynamically | |
72 | sized stack frame. %rbx points to the top half which | |
73 | has a fixed size and preserves the original stack pointer. */ | |
74 | ||
75 | subq $32, %rsp # Allocate the local storage. | |
649bf133 | 76 | cfi_adjust_cfa_offset(32) |
1f7c90a7 UD |
77 | movq %rbx, (%rsp) |
78 | cfi_rel_offset(%rbx, 0) | |
79 | ||
80 | /* On the stack: | |
81 | 56(%rbx) parameter #1 | |
82 | 48(%rbx) return address | |
83 | ||
84 | 40(%rbx) reloc index | |
85 | 32(%rbx) link_map | |
86 | ||
87 | 24(%rbx) La_x86_64_regs pointer | |
88 | 16(%rbx) framesize | |
89 | 8(%rbx) rax | |
90 | (%rbx) rbx | |
91 | */ | |
92 | ||
93 | movq %rax, 8(%rsp) | |
94 | movq %rsp, %rbx | |
95 | cfi_def_cfa_register(%rbx) | |
96 | ||
97 | /* Actively align the La_x86_64_regs structure. */ | |
98 | andq $0xfffffffffffffff0, %rsp | |
2d63a517 | 99 | # if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT |
b0ecde3a L |
100 | /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers |
101 | to detect if any xmm0-xmm7 registers are changed by audit | |
102 | module. */ | |
103 | subq $(LR_SIZE + XMM_SIZE*8), %rsp | |
d7bd7a8a | 104 | # else |
b0ecde3a | 105 | subq $LR_SIZE, %rsp # sizeof(La_x86_64_regs) |
d7bd7a8a | 106 | # endif |
1f7c90a7 UD |
107 | movq %rsp, 24(%rbx) |
108 | ||
b0ecde3a L |
109 | /* Fill the La_x86_64_regs structure. */ |
110 | movq %rdx, LR_RDX_OFFSET(%rsp) | |
111 | movq %r8, LR_R8_OFFSET(%rsp) | |
112 | movq %r9, LR_R9_OFFSET(%rsp) | |
113 | movq %rcx, LR_RCX_OFFSET(%rsp) | |
114 | movq %rsi, LR_RSI_OFFSET(%rsp) | |
115 | movq %rdi, LR_RDI_OFFSET(%rsp) | |
116 | movq %rbp, LR_RBP_OFFSET(%rsp) | |
9dcafc55 | 117 | |
d7bd7a8a UD |
118 | leaq 48(%rbx), %rax |
119 | movq %rax, LR_RSP_OFFSET(%rsp) | |
120 | ||
121 | /* We always store the XMM registers even if AVX is available. | |
382466e0 | 122 | This is to provide backward binary compatibility for existing |
d7bd7a8a UD |
123 | audit modules. */ |
124 | movaps %xmm0, (LR_XMM_OFFSET)(%rsp) | |
125 | movaps %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) | |
126 | movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) | |
127 | movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) | |
128 | movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) | |
129 | movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) | |
130 | movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) | |
131 | movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) | |
132 | ||
2d63a517 | 133 | # if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT |
d7bd7a8a UD |
134 | .data |
135 | L(have_avx): | |
136 | .zero 4 | |
137 | .size L(have_avx), 4 | |
138 | .previous | |
1f7c90a7 | 139 | |
d7bd7a8a | 140 | cmpl $0, L(have_avx)(%rip) |
2d63a517 | 141 | jne L(defined) |
d7bd7a8a UD |
142 | movq %rbx, %r11 # Save rbx |
143 | movl $1, %eax | |
144 | cpuid | |
145 | movq %r11,%rbx # Restore rbx | |
5644ef54 UD |
146 | xorl %eax, %eax |
147 | // AVX and XSAVE supported? | |
1aae088a UD |
148 | andl $((1 << 28) | (1 << 27)), %ecx |
149 | cmpl $((1 << 28) | (1 << 27)), %ecx | |
2d63a517 IZ |
150 | jne 10f |
151 | # ifdef HAVE_AVX512_ASM_SUPPORT | |
152 | // AVX512 supported in processor? | |
153 | movq %rbx, %r11 # Save rbx | |
154 | xorl %ecx, %ecx | |
155 | mov $0x7, %eax | |
156 | cpuid | |
157 | andl $(1 << 16), %ebx | |
158 | # endif | |
5644ef54 UD |
159 | xorl %ecx, %ecx |
160 | // Get XFEATURE_ENABLED_MASK | |
161 | xgetbv | |
2d63a517 IZ |
162 | # ifdef HAVE_AVX512_ASM_SUPPORT |
163 | test %ebx, %ebx | |
164 | movq %r11, %rbx # Restore rbx | |
165 | je 20f | |
166 | // Verify that XCR0[7:5] = '111b' and | |
167 | // XCR0[2:1] = '11b' which means | |
168 | // that zmm state is enabled | |
169 | andl $0xe6, %eax | |
170 | cmpl $0xe6, %eax | |
171 | jne 20f | |
172 | movl %eax, L(have_avx)(%rip) | |
173 | L(avx512): | |
174 | # define RESTORE_AVX | |
175 | # define VMOV vmovdqu64 | |
176 | # define VEC(i) zmm##i | |
177 | # define MORE_CODE | |
178 | # include "dl-trampoline.h" | |
179 | # undef VMOV | |
180 | # undef VEC | |
181 | # undef RESTORE_AVX | |
182 | # endif | |
183 | 20: andl $0x6, %eax | |
184 | 10: subl $0x5, %eax | |
1d002f25 | 185 | movl %eax, L(have_avx)(%rip) |
d7bd7a8a UD |
186 | cmpl $0, %eax |
187 | ||
2d63a517 IZ |
188 | L(defined): |
189 | js L(no_avx) | |
190 | # ifdef HAVE_AVX512_ASM_SUPPORT | |
191 | cmpl $0xe6, L(have_avx)(%rip) | |
192 | je L(avx512) | |
193 | # endif | |
d7bd7a8a | 194 | |
4e1e2f42 | 195 | # define RESTORE_AVX |
2d63a517 IZ |
196 | # define VMOV vmovdqu |
197 | # define VEC(i) ymm##i | |
c88f1766 | 198 | # define MORE_CODE |
4e1e2f42 | 199 | # include "dl-trampoline.h" |
d7bd7a8a | 200 | |
4e1e2f42 L |
201 | .align 16 |
202 | L(no_avx): | |
ca419225 | 203 | # endif |
d7bd7a8a | 204 | |
c88f1766 UD |
205 | # undef RESTORE_AVX |
206 | # include "dl-trampoline.h" | |
d7bd7a8a UD |
207 | |
208 | cfi_endproc | |
209 | .size _dl_runtime_profile, .-_dl_runtime_profile | |
9f0d7b6d | 210 | #endif |
b48a267b UD |
211 | |
212 | ||
213 | #ifdef SHARED | |
214 | .globl _dl_x86_64_save_sse | |
215 | .type _dl_x86_64_save_sse, @function | |
216 | .align 16 | |
217 | cfi_startproc | |
218 | _dl_x86_64_save_sse: | |
2d63a517 | 219 | # if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT |
b48a267b | 220 | cmpl $0, L(have_avx)(%rip) |
2d63a517 | 221 | jne L(defined_5) |
b48a267b UD |
222 | movq %rbx, %r11 # Save rbx |
223 | movl $1, %eax | |
224 | cpuid | |
225 | movq %r11,%rbx # Restore rbx | |
1d002f25 AS |
226 | xorl %eax, %eax |
227 | // AVX and XSAVE supported? | |
1aae088a UD |
228 | andl $((1 << 28) | (1 << 27)), %ecx |
229 | cmpl $((1 << 28) | (1 << 27)), %ecx | |
2d63a517 IZ |
230 | jne 1f |
231 | # ifdef HAVE_AVX512_ASM_SUPPORT | |
232 | // AVX512 supported in a processor? | |
233 | movq %rbx, %r11 # Save rbx | |
234 | xorl %ecx,%ecx | |
235 | mov $0x7,%eax | |
236 | cpuid | |
237 | andl $(1 << 16), %ebx | |
238 | # endif | |
1d002f25 AS |
239 | xorl %ecx, %ecx |
240 | // Get XFEATURE_ENABLED_MASK | |
241 | xgetbv | |
2d63a517 IZ |
242 | # ifdef HAVE_AVX512_ASM_SUPPORT |
243 | test %ebx, %ebx | |
244 | movq %r11, %rbx # Restore rbx | |
245 | je 2f | |
246 | // Verify that XCR0[7:5] = '111b' and | |
247 | // XCR0[2:1] = '11b' which means | |
248 | // that zmm state is enabled | |
249 | andl $0xe6, %eax | |
250 | movl %eax, L(have_avx)(%rip) | |
251 | cmpl $0xe6, %eax | |
252 | je L(avx512_5) | |
253 | # endif | |
254 | ||
255 | 2: andl $0x6, %eax | |
256 | 1: subl $0x5, %eax | |
1d002f25 | 257 | movl %eax, L(have_avx)(%rip) |
b48a267b UD |
258 | cmpl $0, %eax |
259 | ||
2d63a517 IZ |
260 | L(defined_5): |
261 | js L(no_avx5) | |
262 | # ifdef HAVE_AVX512_ASM_SUPPORT | |
263 | cmpl $0xe6, L(have_avx)(%rip) | |
264 | je L(avx512_5) | |
265 | # endif | |
b48a267b | 266 | |
b48a267b UD |
267 | vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE |
268 | vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE | |
269 | vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE | |
270 | vmovdqa %ymm3, %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE | |
271 | vmovdqa %ymm4, %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE | |
272 | vmovdqa %ymm5, %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE | |
273 | vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE | |
274 | vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE | |
275 | ret | |
2d63a517 IZ |
276 | # ifdef HAVE_AVX512_ASM_SUPPORT |
277 | L(avx512_5): | |
278 | vmovdqu64 %zmm0, %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE | |
279 | vmovdqu64 %zmm1, %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE | |
280 | vmovdqu64 %zmm2, %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE | |
281 | vmovdqu64 %zmm3, %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE | |
282 | vmovdqu64 %zmm4, %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE | |
283 | vmovdqu64 %zmm5, %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE | |
284 | vmovdqu64 %zmm6, %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE | |
285 | vmovdqu64 %zmm7, %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE | |
286 | ret | |
287 | # endif | |
b48a267b UD |
288 | L(no_avx5): |
289 | # endif | |
b48a267b UD |
290 | movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE |
291 | movdqa %xmm1, %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE | |
292 | movdqa %xmm2, %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE | |
293 | movdqa %xmm3, %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE | |
294 | movdqa %xmm4, %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE | |
295 | movdqa %xmm5, %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE | |
296 | movdqa %xmm6, %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE | |
297 | movdqa %xmm7, %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE | |
298 | ret | |
299 | cfi_endproc | |
300 | .size _dl_x86_64_save_sse, .-_dl_x86_64_save_sse | |
301 | ||
302 | ||
303 | .globl _dl_x86_64_restore_sse | |
304 | .type _dl_x86_64_restore_sse, @function | |
305 | .align 16 | |
306 | cfi_startproc | |
307 | _dl_x86_64_restore_sse: | |
2d63a517 | 308 | # if defined HAVE_AVX_SUPPORT || defined HAVE_AVX512_ASM_SUPPORT |
b48a267b UD |
309 | cmpl $0, L(have_avx)(%rip) |
310 | js L(no_avx6) | |
2d63a517 IZ |
311 | # ifdef HAVE_AVX512_ASM_SUPPORT |
312 | cmpl $0xe6, L(have_avx)(%rip) | |
313 | je L(avx512_6) | |
314 | # endif | |
b48a267b UD |
315 | |
316 | vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0 | |
317 | vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1 | |
318 | vmovdqa %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE, %ymm2 | |
319 | vmovdqa %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE, %ymm3 | |
320 | vmovdqa %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE, %ymm4 | |
321 | vmovdqa %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE, %ymm5 | |
322 | vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6 | |
323 | vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7 | |
324 | ret | |
2d63a517 IZ |
325 | # ifdef HAVE_AVX512_ASM_SUPPORT |
326 | L(avx512_6): | |
327 | vmovdqu64 %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE, %zmm0 | |
328 | vmovdqu64 %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE, %zmm1 | |
329 | vmovdqu64 %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE, %zmm2 | |
330 | vmovdqu64 %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE, %zmm3 | |
331 | vmovdqu64 %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE, %zmm4 | |
332 | vmovdqu64 %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE, %zmm5 | |
333 | vmovdqu64 %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE, %zmm6 | |
334 | vmovdqu64 %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE, %zmm7 | |
335 | ret | |
336 | # endif | |
b48a267b UD |
337 | L(no_avx6): |
338 | # endif | |
339 | movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0 | |
340 | movdqa %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE, %xmm1 | |
341 | movdqa %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE, %xmm2 | |
342 | movdqa %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE, %xmm3 | |
343 | movdqa %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE, %xmm4 | |
344 | movdqa %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE, %xmm5 | |
345 | movdqa %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE, %xmm6 | |
346 | movdqa %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE, %xmm7 | |
347 | ret | |
348 | cfi_endproc | |
349 | .size _dl_x86_64_restore_sse, .-_dl_x86_64_restore_sse | |
350 | #endif |