]>
Commit | Line | Data |
---|---|---|
9dcafc55 | 1 | /* PLT trampolines. x86-64 version. |
5644ef54 | 2 | Copyright (C) 2004, 2005, 2007, 2009, 2011 Free Software Foundation, Inc. |
9dcafc55 UD |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 PE |
16 | License along with the GNU C Library; if not, see |
17 | <http://www.gnu.org/licenses/>. */ | |
9dcafc55 | 18 | |
b0ecde3a | 19 | #include <config.h> |
9dcafc55 | 20 | #include <sysdep.h> |
b0ecde3a | 21 | #include <link-defines.h> |
9dcafc55 UD |
22 | |
23 | .text | |
24 | .globl _dl_runtime_resolve | |
25 | .type _dl_runtime_resolve, @function | |
26 | .align 16 | |
27 | cfi_startproc | |
28 | _dl_runtime_resolve: | |
0276a718 | 29 | cfi_adjust_cfa_offset(16) # Incorporate PLT |
9dcafc55 | 30 | subq $56,%rsp |
0276a718 | 31 | cfi_adjust_cfa_offset(56) |
9dcafc55 UD |
32 | movq %rax,(%rsp) # Preserve registers otherwise clobbered. |
33 | movq %rcx, 8(%rsp) | |
34 | movq %rdx, 16(%rsp) | |
35 | movq %rsi, 24(%rsp) | |
36 | movq %rdi, 32(%rsp) | |
37 | movq %r8, 40(%rsp) | |
38 | movq %r9, 48(%rsp) | |
39 | movq 64(%rsp), %rsi # Copy args pushed by PLT in register. | |
906dd40d | 40 | movq 56(%rsp), %rdi # %rdi: link_map, %rsi: reloc_index |
9dcafc55 UD |
41 | call _dl_fixup # Call resolver. |
42 | movq %rax, %r11 # Save return value | |
43 | movq 48(%rsp), %r9 # Get register content back. | |
44 | movq 40(%rsp), %r8 | |
45 | movq 32(%rsp), %rdi | |
46 | movq 24(%rsp), %rsi | |
47 | movq 16(%rsp), %rdx | |
48 | movq 8(%rsp), %rcx | |
49 | movq (%rsp), %rax | |
50 | addq $72, %rsp # Adjust stack(PLT did 2 pushes) | |
51 | cfi_adjust_cfa_offset(-72) | |
52 | jmp *%r11 # Jump to function address. | |
53 | cfi_endproc | |
54 | .size _dl_runtime_resolve, .-_dl_runtime_resolve | |
55 | ||
56 | ||
9f0d7b6d | 57 | #ifndef PROF |
9dcafc55 UD |
58 | .globl _dl_runtime_profile |
59 | .type _dl_runtime_profile, @function | |
60 | .align 16 | |
61 | cfi_startproc | |
1f7c90a7 | 62 | |
9dcafc55 | 63 | _dl_runtime_profile: |
649bf133 | 64 | cfi_adjust_cfa_offset(16) # Incorporate PLT |
1f7c90a7 UD |
65 | /* The La_x86_64_regs data structure pointed to by the |
66 | fourth paramater must be 16-byte aligned. This must | |
67 | be explicitly enforced. We have the set up a dynamically | |
68 | sized stack frame. %rbx points to the top half which | |
69 | has a fixed size and preserves the original stack pointer. */ | |
70 | ||
71 | subq $32, %rsp # Allocate the local storage. | |
649bf133 | 72 | cfi_adjust_cfa_offset(32) |
1f7c90a7 UD |
73 | movq %rbx, (%rsp) |
74 | cfi_rel_offset(%rbx, 0) | |
75 | ||
76 | /* On the stack: | |
77 | 56(%rbx) parameter #1 | |
78 | 48(%rbx) return address | |
79 | ||
80 | 40(%rbx) reloc index | |
81 | 32(%rbx) link_map | |
82 | ||
83 | 24(%rbx) La_x86_64_regs pointer | |
84 | 16(%rbx) framesize | |
85 | 8(%rbx) rax | |
86 | (%rbx) rbx | |
87 | */ | |
88 | ||
89 | movq %rax, 8(%rsp) | |
90 | movq %rsp, %rbx | |
91 | cfi_def_cfa_register(%rbx) | |
92 | ||
93 | /* Actively align the La_x86_64_regs structure. */ | |
94 | andq $0xfffffffffffffff0, %rsp | |
b0ecde3a L |
95 | # ifdef HAVE_AVX_SUPPORT |
96 | /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers | |
97 | to detect if any xmm0-xmm7 registers are changed by audit | |
98 | module. */ | |
99 | subq $(LR_SIZE + XMM_SIZE*8), %rsp | |
d7bd7a8a | 100 | # else |
b0ecde3a | 101 | subq $LR_SIZE, %rsp # sizeof(La_x86_64_regs) |
d7bd7a8a | 102 | # endif |
1f7c90a7 UD |
103 | movq %rsp, 24(%rbx) |
104 | ||
b0ecde3a L |
105 | /* Fill the La_x86_64_regs structure. */ |
106 | movq %rdx, LR_RDX_OFFSET(%rsp) | |
107 | movq %r8, LR_R8_OFFSET(%rsp) | |
108 | movq %r9, LR_R9_OFFSET(%rsp) | |
109 | movq %rcx, LR_RCX_OFFSET(%rsp) | |
110 | movq %rsi, LR_RSI_OFFSET(%rsp) | |
111 | movq %rdi, LR_RDI_OFFSET(%rsp) | |
112 | movq %rbp, LR_RBP_OFFSET(%rsp) | |
9dcafc55 | 113 | |
d7bd7a8a UD |
114 | leaq 48(%rbx), %rax |
115 | movq %rax, LR_RSP_OFFSET(%rsp) | |
116 | ||
117 | /* We always store the XMM registers even if AVX is available. | |
118 | This is to provide backward binary compatility for existing | |
119 | audit modules. */ | |
120 | movaps %xmm0, (LR_XMM_OFFSET)(%rsp) | |
121 | movaps %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) | |
122 | movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) | |
123 | movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) | |
124 | movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) | |
125 | movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) | |
126 | movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) | |
127 | movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) | |
128 | ||
b0ecde3a | 129 | # ifdef HAVE_AVX_SUPPORT |
d7bd7a8a UD |
130 | .data |
131 | L(have_avx): | |
132 | .zero 4 | |
133 | .size L(have_avx), 4 | |
134 | .previous | |
1f7c90a7 | 135 | |
d7bd7a8a UD |
136 | cmpl $0, L(have_avx)(%rip) |
137 | jne 1f | |
138 | movq %rbx, %r11 # Save rbx | |
139 | movl $1, %eax | |
140 | cpuid | |
141 | movq %r11,%rbx # Restore rbx | |
5644ef54 UD |
142 | xorl %eax, %eax |
143 | // AVX and XSAVE supported? | |
1aae088a UD |
144 | andl $((1 << 28) | (1 << 27)), %ecx |
145 | cmpl $((1 << 28) | (1 << 27)), %ecx | |
bba33c28 | 146 | jne 2f |
5644ef54 UD |
147 | xorl %ecx, %ecx |
148 | // Get XFEATURE_ENABLED_MASK | |
149 | xgetbv | |
150 | andl $0x6, %eax | |
08a300c9 | 151 | 2: subl $0x5, %eax |
1d002f25 | 152 | movl %eax, L(have_avx)(%rip) |
d7bd7a8a UD |
153 | cmpl $0, %eax |
154 | ||
4e1e2f42 | 155 | 1: js L(no_avx) |
d7bd7a8a | 156 | |
4e1e2f42 | 157 | # define RESTORE_AVX |
c88f1766 | 158 | # define MORE_CODE |
4e1e2f42 | 159 | # include "dl-trampoline.h" |
d7bd7a8a | 160 | |
4e1e2f42 L |
161 | .align 16 |
162 | L(no_avx): | |
ca419225 | 163 | # endif |
d7bd7a8a | 164 | |
c88f1766 UD |
165 | # undef RESTORE_AVX |
166 | # include "dl-trampoline.h" | |
d7bd7a8a UD |
167 | |
168 | cfi_endproc | |
169 | .size _dl_runtime_profile, .-_dl_runtime_profile | |
9f0d7b6d | 170 | #endif |
b48a267b UD |
171 | |
172 | ||
173 | #ifdef SHARED | |
174 | .globl _dl_x86_64_save_sse | |
175 | .type _dl_x86_64_save_sse, @function | |
176 | .align 16 | |
177 | cfi_startproc | |
178 | _dl_x86_64_save_sse: | |
179 | # ifdef HAVE_AVX_SUPPORT | |
180 | cmpl $0, L(have_avx)(%rip) | |
181 | jne 1f | |
182 | movq %rbx, %r11 # Save rbx | |
183 | movl $1, %eax | |
184 | cpuid | |
185 | movq %r11,%rbx # Restore rbx | |
1d002f25 AS |
186 | xorl %eax, %eax |
187 | // AVX and XSAVE supported? | |
1aae088a UD |
188 | andl $((1 << 28) | (1 << 27)), %ecx |
189 | cmpl $((1 << 28) | (1 << 27)), %ecx | |
bba33c28 | 190 | jne 2f |
1d002f25 AS |
191 | xorl %ecx, %ecx |
192 | // Get XFEATURE_ENABLED_MASK | |
193 | xgetbv | |
194 | andl $0x6, %eax | |
195 | cmpl $0x6, %eax | |
196 | // Nonzero if SSE and AVX state saving is enabled. | |
197 | sete %al | |
198 | 2: leal -1(%eax,%eax), %eax | |
199 | movl %eax, L(have_avx)(%rip) | |
b48a267b UD |
200 | cmpl $0, %eax |
201 | ||
202 | 1: js L(no_avx5) | |
203 | ||
204 | # define YMM_SIZE 32 | |
205 | vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE | |
206 | vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE | |
207 | vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE | |
208 | vmovdqa %ymm3, %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE | |
209 | vmovdqa %ymm4, %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE | |
210 | vmovdqa %ymm5, %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE | |
211 | vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE | |
212 | vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE | |
213 | ret | |
214 | L(no_avx5): | |
215 | # endif | |
b48a267b UD |
216 | movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE |
217 | movdqa %xmm1, %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE | |
218 | movdqa %xmm2, %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE | |
219 | movdqa %xmm3, %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE | |
220 | movdqa %xmm4, %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE | |
221 | movdqa %xmm5, %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE | |
222 | movdqa %xmm6, %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE | |
223 | movdqa %xmm7, %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE | |
224 | ret | |
225 | cfi_endproc | |
226 | .size _dl_x86_64_save_sse, .-_dl_x86_64_save_sse | |
227 | ||
228 | ||
229 | .globl _dl_x86_64_restore_sse | |
230 | .type _dl_x86_64_restore_sse, @function | |
231 | .align 16 | |
232 | cfi_startproc | |
233 | _dl_x86_64_restore_sse: | |
234 | # ifdef HAVE_AVX_SUPPORT | |
235 | cmpl $0, L(have_avx)(%rip) | |
236 | js L(no_avx6) | |
237 | ||
238 | vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0 | |
239 | vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1 | |
240 | vmovdqa %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE, %ymm2 | |
241 | vmovdqa %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE, %ymm3 | |
242 | vmovdqa %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE, %ymm4 | |
243 | vmovdqa %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE, %ymm5 | |
244 | vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6 | |
245 | vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7 | |
246 | ret | |
247 | L(no_avx6): | |
248 | # endif | |
249 | movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0 | |
250 | movdqa %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE, %xmm1 | |
251 | movdqa %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE, %xmm2 | |
252 | movdqa %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE, %xmm3 | |
253 | movdqa %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE, %xmm4 | |
254 | movdqa %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE, %xmm5 | |
255 | movdqa %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE, %xmm6 | |
256 | movdqa %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE, %xmm7 | |
257 | ret | |
258 | cfi_endproc | |
259 | .size _dl_x86_64_restore_sse, .-_dl_x86_64_restore_sse | |
260 | #endif |