]>
Commit | Line | Data |
---|---|---|
9dcafc55 | 1 | /* PLT trampolines. x86-64 version. |
568035b7 | 2 | Copyright (C) 2004-2013 Free Software Foundation, Inc. |
9dcafc55 UD |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 PE |
16 | License along with the GNU C Library; if not, see |
17 | <http://www.gnu.org/licenses/>. */ | |
9dcafc55 | 18 | |
b0ecde3a | 19 | #include <config.h> |
9dcafc55 | 20 | #include <sysdep.h> |
b0ecde3a | 21 | #include <link-defines.h> |
9dcafc55 | 22 | |
1cf463cd L |
23 | #if (RTLD_SAVESPACE_SSE % 32) != 0 |
24 | # error RTLD_SAVESPACE_SSE must be aligned to 32 bytes | |
25 | #endif | |
26 | ||
9dcafc55 UD |
27 | .text |
28 | .globl _dl_runtime_resolve | |
29 | .type _dl_runtime_resolve, @function | |
30 | .align 16 | |
31 | cfi_startproc | |
32 | _dl_runtime_resolve: | |
0276a718 | 33 | cfi_adjust_cfa_offset(16) # Incorporate PLT |
9dcafc55 | 34 | subq $56,%rsp |
0276a718 | 35 | cfi_adjust_cfa_offset(56) |
9dcafc55 UD |
36 | movq %rax,(%rsp) # Preserve registers otherwise clobbered. |
37 | movq %rcx, 8(%rsp) | |
38 | movq %rdx, 16(%rsp) | |
39 | movq %rsi, 24(%rsp) | |
40 | movq %rdi, 32(%rsp) | |
41 | movq %r8, 40(%rsp) | |
42 | movq %r9, 48(%rsp) | |
43 | movq 64(%rsp), %rsi # Copy args pushed by PLT in register. | |
906dd40d | 44 | movq 56(%rsp), %rdi # %rdi: link_map, %rsi: reloc_index |
9dcafc55 UD |
45 | call _dl_fixup # Call resolver. |
46 | movq %rax, %r11 # Save return value | |
47 | movq 48(%rsp), %r9 # Get register content back. | |
48 | movq 40(%rsp), %r8 | |
49 | movq 32(%rsp), %rdi | |
50 | movq 24(%rsp), %rsi | |
51 | movq 16(%rsp), %rdx | |
52 | movq 8(%rsp), %rcx | |
53 | movq (%rsp), %rax | |
54 | addq $72, %rsp # Adjust stack(PLT did 2 pushes) | |
55 | cfi_adjust_cfa_offset(-72) | |
56 | jmp *%r11 # Jump to function address. | |
57 | cfi_endproc | |
58 | .size _dl_runtime_resolve, .-_dl_runtime_resolve | |
59 | ||
60 | ||
9f0d7b6d | 61 | #ifndef PROF |
9dcafc55 UD |
62 | .globl _dl_runtime_profile |
63 | .type _dl_runtime_profile, @function | |
64 | .align 16 | |
65 | cfi_startproc | |
1f7c90a7 | 66 | |
9dcafc55 | 67 | _dl_runtime_profile: |
649bf133 | 68 | cfi_adjust_cfa_offset(16) # Incorporate PLT |
1f7c90a7 UD |
69 | /* The La_x86_64_regs data structure pointed to by the |
70 | fourth paramater must be 16-byte aligned. This must | |
71 | be explicitly enforced. We have the set up a dynamically | |
72 | sized stack frame. %rbx points to the top half which | |
73 | has a fixed size and preserves the original stack pointer. */ | |
74 | ||
75 | subq $32, %rsp # Allocate the local storage. | |
649bf133 | 76 | cfi_adjust_cfa_offset(32) |
1f7c90a7 UD |
77 | movq %rbx, (%rsp) |
78 | cfi_rel_offset(%rbx, 0) | |
79 | ||
80 | /* On the stack: | |
81 | 56(%rbx) parameter #1 | |
82 | 48(%rbx) return address | |
83 | ||
84 | 40(%rbx) reloc index | |
85 | 32(%rbx) link_map | |
86 | ||
87 | 24(%rbx) La_x86_64_regs pointer | |
88 | 16(%rbx) framesize | |
89 | 8(%rbx) rax | |
90 | (%rbx) rbx | |
91 | */ | |
92 | ||
93 | movq %rax, 8(%rsp) | |
94 | movq %rsp, %rbx | |
95 | cfi_def_cfa_register(%rbx) | |
96 | ||
97 | /* Actively align the La_x86_64_regs structure. */ | |
98 | andq $0xfffffffffffffff0, %rsp | |
b0ecde3a L |
99 | # ifdef HAVE_AVX_SUPPORT |
100 | /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers | |
101 | to detect if any xmm0-xmm7 registers are changed by audit | |
102 | module. */ | |
103 | subq $(LR_SIZE + XMM_SIZE*8), %rsp | |
d7bd7a8a | 104 | # else |
b0ecde3a | 105 | subq $LR_SIZE, %rsp # sizeof(La_x86_64_regs) |
d7bd7a8a | 106 | # endif |
1f7c90a7 UD |
107 | movq %rsp, 24(%rbx) |
108 | ||
b0ecde3a L |
109 | /* Fill the La_x86_64_regs structure. */ |
110 | movq %rdx, LR_RDX_OFFSET(%rsp) | |
111 | movq %r8, LR_R8_OFFSET(%rsp) | |
112 | movq %r9, LR_R9_OFFSET(%rsp) | |
113 | movq %rcx, LR_RCX_OFFSET(%rsp) | |
114 | movq %rsi, LR_RSI_OFFSET(%rsp) | |
115 | movq %rdi, LR_RDI_OFFSET(%rsp) | |
116 | movq %rbp, LR_RBP_OFFSET(%rsp) | |
9dcafc55 | 117 | |
d7bd7a8a UD |
118 | leaq 48(%rbx), %rax |
119 | movq %rax, LR_RSP_OFFSET(%rsp) | |
120 | ||
121 | /* We always store the XMM registers even if AVX is available. | |
382466e0 | 122 | This is to provide backward binary compatibility for existing |
d7bd7a8a UD |
123 | audit modules. */ |
124 | movaps %xmm0, (LR_XMM_OFFSET)(%rsp) | |
125 | movaps %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) | |
126 | movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) | |
127 | movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) | |
128 | movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) | |
129 | movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) | |
130 | movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) | |
131 | movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) | |
132 | ||
b0ecde3a | 133 | # ifdef HAVE_AVX_SUPPORT |
d7bd7a8a UD |
134 | .data |
135 | L(have_avx): | |
136 | .zero 4 | |
137 | .size L(have_avx), 4 | |
138 | .previous | |
1f7c90a7 | 139 | |
d7bd7a8a UD |
140 | cmpl $0, L(have_avx)(%rip) |
141 | jne 1f | |
142 | movq %rbx, %r11 # Save rbx | |
143 | movl $1, %eax | |
144 | cpuid | |
145 | movq %r11,%rbx # Restore rbx | |
5644ef54 UD |
146 | xorl %eax, %eax |
147 | // AVX and XSAVE supported? | |
1aae088a UD |
148 | andl $((1 << 28) | (1 << 27)), %ecx |
149 | cmpl $((1 << 28) | (1 << 27)), %ecx | |
bba33c28 | 150 | jne 2f |
5644ef54 UD |
151 | xorl %ecx, %ecx |
152 | // Get XFEATURE_ENABLED_MASK | |
153 | xgetbv | |
154 | andl $0x6, %eax | |
08a300c9 | 155 | 2: subl $0x5, %eax |
1d002f25 | 156 | movl %eax, L(have_avx)(%rip) |
d7bd7a8a UD |
157 | cmpl $0, %eax |
158 | ||
4e1e2f42 | 159 | 1: js L(no_avx) |
d7bd7a8a | 160 | |
4e1e2f42 | 161 | # define RESTORE_AVX |
c88f1766 | 162 | # define MORE_CODE |
4e1e2f42 | 163 | # include "dl-trampoline.h" |
d7bd7a8a | 164 | |
4e1e2f42 L |
165 | .align 16 |
166 | L(no_avx): | |
ca419225 | 167 | # endif |
d7bd7a8a | 168 | |
c88f1766 UD |
169 | # undef RESTORE_AVX |
170 | # include "dl-trampoline.h" | |
d7bd7a8a UD |
171 | |
172 | cfi_endproc | |
173 | .size _dl_runtime_profile, .-_dl_runtime_profile | |
9f0d7b6d | 174 | #endif |
b48a267b UD |
175 | |
176 | ||
177 | #ifdef SHARED | |
178 | .globl _dl_x86_64_save_sse | |
179 | .type _dl_x86_64_save_sse, @function | |
180 | .align 16 | |
181 | cfi_startproc | |
182 | _dl_x86_64_save_sse: | |
183 | # ifdef HAVE_AVX_SUPPORT | |
184 | cmpl $0, L(have_avx)(%rip) | |
185 | jne 1f | |
186 | movq %rbx, %r11 # Save rbx | |
187 | movl $1, %eax | |
188 | cpuid | |
189 | movq %r11,%rbx # Restore rbx | |
1d002f25 AS |
190 | xorl %eax, %eax |
191 | // AVX and XSAVE supported? | |
1aae088a UD |
192 | andl $((1 << 28) | (1 << 27)), %ecx |
193 | cmpl $((1 << 28) | (1 << 27)), %ecx | |
bba33c28 | 194 | jne 2f |
1d002f25 AS |
195 | xorl %ecx, %ecx |
196 | // Get XFEATURE_ENABLED_MASK | |
197 | xgetbv | |
198 | andl $0x6, %eax | |
199 | cmpl $0x6, %eax | |
200 | // Nonzero if SSE and AVX state saving is enabled. | |
201 | sete %al | |
202 | 2: leal -1(%eax,%eax), %eax | |
203 | movl %eax, L(have_avx)(%rip) | |
b48a267b UD |
204 | cmpl $0, %eax |
205 | ||
206 | 1: js L(no_avx5) | |
207 | ||
208 | # define YMM_SIZE 32 | |
209 | vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE | |
210 | vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE | |
211 | vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE | |
212 | vmovdqa %ymm3, %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE | |
213 | vmovdqa %ymm4, %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE | |
214 | vmovdqa %ymm5, %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE | |
215 | vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE | |
216 | vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE | |
217 | ret | |
218 | L(no_avx5): | |
219 | # endif | |
b48a267b UD |
220 | movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE |
221 | movdqa %xmm1, %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE | |
222 | movdqa %xmm2, %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE | |
223 | movdqa %xmm3, %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE | |
224 | movdqa %xmm4, %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE | |
225 | movdqa %xmm5, %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE | |
226 | movdqa %xmm6, %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE | |
227 | movdqa %xmm7, %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE | |
228 | ret | |
229 | cfi_endproc | |
230 | .size _dl_x86_64_save_sse, .-_dl_x86_64_save_sse | |
231 | ||
232 | ||
233 | .globl _dl_x86_64_restore_sse | |
234 | .type _dl_x86_64_restore_sse, @function | |
235 | .align 16 | |
236 | cfi_startproc | |
237 | _dl_x86_64_restore_sse: | |
238 | # ifdef HAVE_AVX_SUPPORT | |
239 | cmpl $0, L(have_avx)(%rip) | |
240 | js L(no_avx6) | |
241 | ||
242 | vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0 | |
243 | vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1 | |
244 | vmovdqa %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE, %ymm2 | |
245 | vmovdqa %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE, %ymm3 | |
246 | vmovdqa %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE, %ymm4 | |
247 | vmovdqa %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE, %ymm5 | |
248 | vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6 | |
249 | vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7 | |
250 | ret | |
251 | L(no_avx6): | |
252 | # endif | |
253 | movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0 | |
254 | movdqa %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE, %xmm1 | |
255 | movdqa %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE, %xmm2 | |
256 | movdqa %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE, %xmm3 | |
257 | movdqa %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE, %xmm4 | |
258 | movdqa %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE, %xmm5 | |
259 | movdqa %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE, %xmm6 | |
260 | movdqa %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE, %xmm7 | |
261 | ret | |
262 | cfi_endproc | |
263 | .size _dl_x86_64_restore_sse, .-_dl_x86_64_restore_sse | |
264 | #endif |