]>
Commit | Line | Data |
---|---|---|
4e1e2f42 L |
1 | /* Partial PLT profile trampoline to save and restore x86-64 vector |
2 | registers. | |
b168057a | 3 | Copyright (C) 2009-2015 Free Software Foundation, Inc. |
4e1e2f42 L |
4 | This file is part of the GNU C Library. |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 PE |
17 | License along with the GNU C Library; if not, see |
18 | <http://www.gnu.org/licenses/>. */ | |
4e1e2f42 L |
19 | |
20 | #ifdef RESTORE_AVX | |
21 | /* This is to support AVX audit modules. */ | |
2d63a517 IZ |
22 | VMOV %VEC(0), (LR_VECTOR_OFFSET)(%rsp) |
23 | VMOV %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) | |
24 | VMOV %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) | |
25 | VMOV %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) | |
26 | VMOV %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) | |
27 | VMOV %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) | |
28 | VMOV %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) | |
29 | VMOV %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) | |
4e1e2f42 L |
30 | |
31 | /* Save xmm0-xmm7 registers to detect if any of them are | |
32 | changed by audit module. */ | |
33 | vmovdqa %xmm0, (LR_SIZE)(%rsp) | |
34 | vmovdqa %xmm1, (LR_SIZE + XMM_SIZE)(%rsp) | |
35 | vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp) | |
36 | vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp) | |
37 | vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp) | |
38 | vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp) | |
39 | vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp) | |
40 | vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp) | |
41 | #endif | |
42 | ||
d86813a0 L |
43 | mov %RSP_LP, %RCX_LP # La_x86_64_regs pointer to %rcx. |
44 | mov 48(%rbx), %RDX_LP # Load return address if needed. | |
45 | mov 40(%rbx), %RSI_LP # Copy args pushed by PLT in register. | |
46 | mov 32(%rbx), %RDI_LP # %rdi: link_map, %rsi: reloc_index | |
47 | lea 16(%rbx), %R8_LP # Address of framesize | |
4e1e2f42 L |
48 | call _dl_profile_fixup # Call resolver. |
49 | ||
d86813a0 | 50 | mov %RAX_LP, %R11_LP # Save return value. |
4e1e2f42 L |
51 | |
52 | movq 8(%rbx), %rax # Get back register content. | |
53 | movq LR_RDX_OFFSET(%rsp), %rdx | |
54 | movq LR_R8_OFFSET(%rsp), %r8 | |
55 | movq LR_R9_OFFSET(%rsp), %r9 | |
56 | ||
57 | movaps (LR_XMM_OFFSET)(%rsp), %xmm0 | |
58 | movaps (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1 | |
59 | movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2 | |
60 | movaps (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3 | |
61 | movaps (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4 | |
62 | movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5 | |
63 | movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6 | |
64 | movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7 | |
65 | ||
ea8ba7cd IZ |
66 | #ifndef __ILP32__ |
67 | # ifdef HAVE_MPX_SUPPORT | |
68 | bndmov (LR_BND_OFFSET)(%rsp), %bnd0 # Restore bound | |
69 | bndmov (LR_BND_OFFSET + BND_SIZE)(%rsp), %bnd1 # registers. | |
70 | bndmov (LR_BND_OFFSET + BND_SIZE*2)(%rsp), %bnd2 | |
71 | bndmov (LR_BND_OFFSET + BND_SIZE*3)(%rsp), %bnd3 | |
72 | # else | |
73 | .byte 0x66,0x0f,0x1a,0x84,0x24;.long (LR_BND_OFFSET) | |
74 | .byte 0x66,0x0f,0x1a,0x8c,0x24;.long (LR_BND_OFFSET + BND_SIZE) | |
75 | .byte 0x66,0x0f,0x1a,0x94,0x24;.long (LR_BND_OFFSET + BND_SIZE*2) | |
76 | .byte 0x66,0x0f,0x1a,0x9c,0x24;.long (LR_BND_OFFSET + BND_SIZE*3) | |
77 | # endif | |
78 | #endif | |
79 | ||
4e1e2f42 L |
80 | #ifdef RESTORE_AVX |
81 | /* Check if any xmm0-xmm7 registers are changed by audit | |
82 | module. */ | |
83 | vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8 | |
84 | vpmovmskb %xmm8, %esi | |
85 | cmpl $0xffff, %esi | |
86 | je 2f | |
87 | vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp) | |
88 | jmp 1f | |
2d63a517 | 89 | 2: VMOV (LR_VECTOR_OFFSET)(%rsp), %VEC(0) |
4e1e2f42 L |
90 | vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp) |
91 | ||
92 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8 | |
93 | vpmovmskb %xmm8, %esi | |
94 | cmpl $0xffff, %esi | |
95 | je 2f | |
96 | vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) | |
97 | jmp 1f | |
2d63a517 | 98 | 2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1) |
4e1e2f42 L |
99 | vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) |
100 | ||
101 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8 | |
102 | vpmovmskb %xmm8, %esi | |
103 | cmpl $0xffff, %esi | |
104 | je 2f | |
105 | vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) | |
106 | jmp 1f | |
2d63a517 | 107 | 2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2) |
4e1e2f42 L |
108 | vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) |
109 | ||
110 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8 | |
111 | vpmovmskb %xmm8, %esi | |
112 | cmpl $0xffff, %esi | |
113 | je 2f | |
114 | vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) | |
115 | jmp 1f | |
2d63a517 | 116 | 2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3) |
4e1e2f42 L |
117 | vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) |
118 | ||
119 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8 | |
120 | vpmovmskb %xmm8, %esi | |
121 | cmpl $0xffff, %esi | |
122 | je 2f | |
123 | vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) | |
124 | jmp 1f | |
2d63a517 | 125 | 2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4) |
4e1e2f42 L |
126 | vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) |
127 | ||
128 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8 | |
129 | vpmovmskb %xmm8, %esi | |
130 | cmpl $0xffff, %esi | |
131 | je 2f | |
132 | vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) | |
133 | jmp 1f | |
2d63a517 | 134 | 2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5) |
4e1e2f42 L |
135 | vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) |
136 | ||
137 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8 | |
138 | vpmovmskb %xmm8, %esi | |
139 | cmpl $0xffff, %esi | |
140 | je 2f | |
141 | vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) | |
142 | jmp 1f | |
2d63a517 | 143 | 2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6) |
4e1e2f42 L |
144 | vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) |
145 | ||
146 | 1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8 | |
147 | vpmovmskb %xmm8, %esi | |
148 | cmpl $0xffff, %esi | |
149 | je 2f | |
150 | vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) | |
151 | jmp 1f | |
2d63a517 | 152 | 2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7) |
4e1e2f42 L |
153 | vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) |
154 | ||
155 | 1: | |
156 | #endif | |
d86813a0 L |
157 | mov 16(%rbx), %R10_LP # Anything in framesize? |
158 | test %R10_LP, %R10_LP | |
4e1e2f42 L |
159 | jns 3f |
160 | ||
161 | /* There's nothing in the frame size, so there | |
162 | will be no call to the _dl_call_pltexit. */ | |
163 | ||
164 | /* Get back registers content. */ | |
165 | movq LR_RCX_OFFSET(%rsp), %rcx | |
166 | movq LR_RSI_OFFSET(%rsp), %rsi | |
167 | movq LR_RDI_OFFSET(%rsp), %rdi | |
168 | ||
169 | movq %rbx, %rsp | |
170 | movq (%rsp), %rbx | |
171 | cfi_restore(rbx) | |
172 | cfi_def_cfa_register(%rsp) | |
173 | ||
174 | addq $48, %rsp # Adjust the stack to the return value | |
175 | # (eats the reloc index and link_map) | |
176 | cfi_adjust_cfa_offset(-48) | |
177 | jmp *%r11 # Jump to function address. | |
178 | ||
179 | 3: | |
180 | cfi_adjust_cfa_offset(48) | |
181 | cfi_rel_offset(%rbx, 0) | |
182 | cfi_def_cfa_register(%rbx) | |
183 | ||
184 | /* At this point we need to prepare new stack for the function | |
185 | which has to be called. We copy the original stack to a | |
186 | temporary buffer of the size specified by the 'framesize' | |
187 | returned from _dl_profile_fixup */ | |
188 | ||
189 | leaq LR_RSP_OFFSET(%rbx), %rsi # stack | |
190 | addq $8, %r10 | |
191 | andq $0xfffffffffffffff0, %r10 | |
192 | movq %r10, %rcx | |
193 | subq %r10, %rsp | |
194 | movq %rsp, %rdi | |
195 | shrq $3, %rcx | |
196 | rep | |
197 | movsq | |
198 | ||
199 | movq 24(%rdi), %rcx # Get back register content. | |
200 | movq 32(%rdi), %rsi | |
201 | movq 40(%rdi), %rdi | |
202 | ||
203 | call *%r11 | |
204 | ||
205 | mov 24(%rbx), %rsp # Drop the copied stack content | |
206 | ||
207 | /* Now we have to prepare the La_x86_64_retval structure for the | |
208 | _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now, | |
209 | so we just need to allocate the sizeof(La_x86_64_retval) space on | |
210 | the stack, since the alignment has already been taken care of. */ | |
c88f1766 | 211 | #ifdef RESTORE_AVX |
4e1e2f42 L |
212 | /* sizeof(La_x86_64_retval). Need extra space for 2 SSE |
213 | registers to detect if xmm0/xmm1 registers are changed | |
214 | by audit module. */ | |
215 | subq $(LRV_SIZE + XMM_SIZE*2), %rsp | |
c88f1766 | 216 | #else |
4e1e2f42 | 217 | subq $LRV_SIZE, %rsp # sizeof(La_x86_64_retval) |
c88f1766 | 218 | #endif |
4e1e2f42 L |
219 | movq %rsp, %rcx # La_x86_64_retval argument to %rcx. |
220 | ||
221 | /* Fill in the La_x86_64_retval structure. */ | |
222 | movq %rax, LRV_RAX_OFFSET(%rcx) | |
223 | movq %rdx, LRV_RDX_OFFSET(%rcx) | |
224 | ||
225 | movaps %xmm0, LRV_XMM0_OFFSET(%rcx) | |
226 | movaps %xmm1, LRV_XMM1_OFFSET(%rcx) | |
227 | ||
c88f1766 | 228 | #ifdef RESTORE_AVX |
4e1e2f42 | 229 | /* This is to support AVX audit modules. */ |
2d63a517 IZ |
230 | VMOV %VEC(0), LRV_VECTOR0_OFFSET(%rcx) |
231 | VMOV %VEC(1), LRV_VECTOR1_OFFSET(%rcx) | |
4e1e2f42 L |
232 | |
233 | /* Save xmm0/xmm1 registers to detect if they are changed | |
234 | by audit module. */ | |
235 | vmovdqa %xmm0, (LRV_SIZE)(%rcx) | |
236 | vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx) | |
c88f1766 | 237 | #endif |
4e1e2f42 | 238 | |
ea8ba7cd IZ |
239 | #ifndef __ILP32__ |
240 | # ifdef HAVE_MPX_SUPPORT | |
241 | bndmov %bnd0, LRV_BND0_OFFSET(%rcx) # Preserve returned bounds. | |
242 | bndmov %bnd1, LRV_BND1_OFFSET(%rcx) | |
243 | # else | |
244 | .byte 0x66,0x0f,0x1b,0x81;.long (LRV_BND0_OFFSET) | |
245 | .byte 0x66,0x0f,0x1b,0x89;.long (LRV_BND1_OFFSET) | |
246 | # endif | |
247 | #endif | |
248 | ||
4e1e2f42 L |
249 | fstpt LRV_ST0_OFFSET(%rcx) |
250 | fstpt LRV_ST1_OFFSET(%rcx) | |
251 | ||
252 | movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx. | |
253 | movq 40(%rbx), %rsi # Copy args pushed by PLT in register. | |
c88f1766 | 254 | movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index |
4e1e2f42 L |
255 | call _dl_call_pltexit |
256 | ||
257 | /* Restore return registers. */ | |
258 | movq LRV_RAX_OFFSET(%rsp), %rax | |
259 | movq LRV_RDX_OFFSET(%rsp), %rdx | |
260 | ||
261 | movaps LRV_XMM0_OFFSET(%rsp), %xmm0 | |
262 | movaps LRV_XMM1_OFFSET(%rsp), %xmm1 | |
263 | ||
c88f1766 | 264 | #ifdef RESTORE_AVX |
4e1e2f42 L |
265 | /* Check if xmm0/xmm1 registers are changed by audit module. */ |
266 | vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2 | |
267 | vpmovmskb %xmm2, %esi | |
268 | cmpl $0xffff, %esi | |
269 | jne 1f | |
2d63a517 | 270 | VMOV LRV_VECTOR0_OFFSET(%rsp), %VEC(0) |
4e1e2f42 L |
271 | |
272 | 1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2 | |
273 | vpmovmskb %xmm2, %esi | |
274 | cmpl $0xffff, %esi | |
275 | jne 1f | |
2d63a517 | 276 | VMOV LRV_VECTOR1_OFFSET(%rsp), %VEC(1) |
4e1e2f42 L |
277 | |
278 | 1: | |
ea8ba7cd IZ |
279 | #endif |
280 | ||
281 | #ifndef __ILP32__ | |
282 | # ifdef HAVE_MPX_SUPPORT | |
283 | bndmov LRV_BND0_OFFSET(%rcx), %bnd0 # Restore bound registers. | |
284 | bndmov LRV_BND1_OFFSET(%rcx), %bnd1 | |
285 | # else | |
286 | .byte 0x66,0x0f,0x1a,0x81;.long (LRV_BND0_OFFSET) | |
287 | .byte 0x66,0x0f,0x1a,0x89;.long (LRV_BND1_OFFSET) | |
288 | # endif | |
c88f1766 | 289 | #endif |
4e1e2f42 L |
290 | |
291 | fldt LRV_ST1_OFFSET(%rsp) | |
292 | fldt LRV_ST0_OFFSET(%rsp) | |
293 | ||
294 | movq %rbx, %rsp | |
295 | movq (%rsp), %rbx | |
296 | cfi_restore(rbx) | |
297 | cfi_def_cfa_register(%rsp) | |
298 | ||
299 | addq $48, %rsp # Adjust the stack to the return value | |
300 | # (eats the reloc index and link_map) | |
301 | cfi_adjust_cfa_offset(-48) | |
302 | retq | |
c88f1766 UD |
303 | |
304 | #ifdef MORE_CODE | |
305 | cfi_adjust_cfa_offset(48) | |
306 | cfi_rel_offset(%rbx, 0) | |
307 | cfi_def_cfa_register(%rbx) | |
308 | # undef MORE_CODE | |
309 | #endif |